Browse Source

Assorted cleanup, linter fixes, and improvements for Python 3

tags/v0.6
Ben Kurtovic 3 years ago
parent
commit
1c983d3738
52 changed files with 440 additions and 376 deletions
  1. +8
    -0
      docs/api/mwparserfromhell.nodes.rst
  2. +7
    -0
      docs/api/mwparserfromhell.parser.rst
  3. +1
    -13
      docs/api/mwparserfromhell.rst
  4. +30
    -0
      docs/api/mwparserfromhell.smart_list.rst
  5. +1
    -1
      docs/conf.py
  6. +2
    -2
      mwparserfromhell/__init__.py
  7. +1
    -1
      mwparserfromhell/definitions.py
  8. +5
    -36
      mwparserfromhell/nodes/__init__.py
  9. +51
    -0
      mwparserfromhell/nodes/_base.py
  10. +3
    -3
      mwparserfromhell/nodes/argument.py
  11. +3
    -3
      mwparserfromhell/nodes/comment.py
  12. +4
    -3
      mwparserfromhell/nodes/external_link.py
  13. +2
    -2
      mwparserfromhell/nodes/extras/attribute.py
  14. +2
    -2
      mwparserfromhell/nodes/extras/parameter.py
  15. +3
    -3
      mwparserfromhell/nodes/heading.py
  16. +13
    -12
      mwparserfromhell/nodes/html_entity.py
  17. +6
    -7
      mwparserfromhell/nodes/tag.py
  18. +13
    -11
      mwparserfromhell/nodes/template.py
  19. +3
    -3
      mwparserfromhell/nodes/text.py
  20. +3
    -3
      mwparserfromhell/nodes/wikilink.py
  21. +2
    -14
      mwparserfromhell/parser/__init__.py
  22. +6
    -7
      mwparserfromhell/parser/builder.py
  23. +1
    -1
      mwparserfromhell/parser/ctokenizer/definitions.c
  24. +34
    -0
      mwparserfromhell/parser/errors.py
  25. +74
    -72
      mwparserfromhell/parser/tokenizer.py
  26. +1
    -2
      mwparserfromhell/parser/tokens.py
  27. +4
    -3
      mwparserfromhell/smart_list/__init__.py
  28. +13
    -16
      mwparserfromhell/smart_list/list_proxy.py
  29. +10
    -8
      mwparserfromhell/smart_list/smart_list.py
  30. +22
    -25
      mwparserfromhell/string_mixin.py
  31. +20
    -21
      mwparserfromhell/utils.py
  32. +16
    -16
      mwparserfromhell/wikicode.py
  33. +7
    -11
      scripts/memtest.py
  34. +1
    -1
      setup.py
  35. +4
    -5
      tests/_test_tokenizer.py
  36. +1
    -2
      tests/_test_tree_equality.py
  37. +3
    -3
      tests/test_argument.py
  38. +3
    -3
      tests/test_attribute.py
  39. +3
    -3
      tests/test_comment.py
  40. +4
    -4
      tests/test_docs.py
  41. +3
    -3
      tests/test_external_link.py
  42. +3
    -3
      tests/test_heading.py
  43. +4
    -4
      tests/test_html_entity.py
  44. +4
    -5
      tests/test_parameter.py
  45. +12
    -11
      tests/test_smart_list.py
  46. +3
    -3
      tests/test_string_mixin.py
  47. +4
    -4
      tests/test_tag.py
  48. +3
    -3
      tests/test_template.py
  49. +3
    -3
      tests/test_text.py
  50. +4
    -7
      tests/test_tokens.py
  51. +4
    -5
      tests/test_wikicode.py
  52. +3
    -3
      tests/test_wikilink.py

+ 8
- 0
docs/api/mwparserfromhell.nodes.rst View File

@@ -9,6 +9,14 @@ nodes Package
.. autoclass:: mwparserfromhell.nodes.Node
:special-members:

:mod:`_base` Module
----------------------

.. automodule:: mwparserfromhell.nodes._base
:members:
:undoc-members:
:show-inheritance:

:mod:`argument` Module
----------------------



+ 7
- 0
docs/api/mwparserfromhell.parser.rst View File

@@ -23,6 +23,13 @@ parser Package
:members:
:undoc-members:

:mod:`errors` Module
--------------------

.. automodule:: mwparserfromhell.parser.errors
:members:
:undoc-members:

:mod:`tokenizer` Module
-----------------------



+ 1
- 13
docs/api/mwparserfromhell.rst View File

@@ -8,27 +8,12 @@ mwparserfromhell Package
:members:
:undoc-members:

:mod:`compat` Module

.. automodule:: mwparserfromhell.compat
:members:
:undoc-members:

:mod:`definitions` Module
-------------------------

.. automodule:: mwparserfromhell.definitions
:members:

:mod:`smart_list` Module

.. automodule:: mwparserfromhell.smart_list
:members: SmartList, _ListProxy
:undoc-members:
:show-inheritance:

:mod:`string_mixin` Module
--------------------------

@@ -58,3 +43,4 @@ Subpackages

mwparserfromhell.nodes
mwparserfromhell.parser
mwparserfromhell.smart_list

+ 30
- 0
docs/api/mwparserfromhell.smart_list.rst View File

@@ -0,0 +1,30 @@
smart_list Package
==================

:mod:`smart_list` Package
-------------------------

.. automodule:: mwparserfromhell.smart_list
:members:
:undoc-members:

:mod:`list_proxy` Module
---------------------

.. automodule:: mwparserfromhell.smart_list.list_proxy
:members:
:undoc-members:

:mod:`smart_list` Module
---------------------

.. automodule:: mwparserfromhell.smart_list.smart_list
:members:
:undoc-members:

:mod:`utils` Module
---------------------

.. automodule:: mwparserfromhell.smart_list.utils
:members:
:undoc-members:

+ 1
- 1
docs/conf.py View File

@@ -42,7 +42,7 @@ master_doc = 'index'

# General information about the project.
project = u'mwparserfromhell'
copyright = u'2012–2019 Ben Kurtovic'
copyright = u'2012–2020 Ben Kurtovic'

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the


+ 2
- 2
mwparserfromhell/__init__.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -26,7 +26,7 @@ outrageously powerful parser for `MediaWiki <https://www.mediawiki.org>`_ wikico
"""

__author__ = "Ben Kurtovic"
__copyright__ = "Copyright (C) 2012-2019 Ben Kurtovic"
__copyright__ = "Copyright (C) 2012-2020 Ben Kurtovic"
__license__ = "MIT License"
__version__ = "0.6.dev0"
__email__ = "ben.kurtovic@gmail.com"


+ 1
- 1
mwparserfromhell/definitions.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal


+ 5
- 36
mwparserfromhell/nodes/__init__.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -28,42 +28,8 @@ the name of a :class:`.Template` is a :class:`.Wikicode` object that can
contain text or more templates.
"""


from ..string_mixin import StringMixIn

__all__ = ["Argument", "Comment", "ExternalLink", "HTMLEntity", "Heading",
"Node", "Tag", "Template", "Text", "Wikilink"]

class Node(StringMixIn):
"""Represents the base Node type, demonstrating the methods to override.

:meth:`__unicode__` must be overridden. It should return a ``unicode`` or
(``str`` in py3k) representation of the node. If the node contains
:class:`.Wikicode` objects inside of it, :meth:`__children__` should be a
generator that iterates over them. If the node is printable
(shown when the page is rendered), :meth:`__strip__` should return its
printable version, stripping out any formatting marks. It does not have to
return a string, but something that can be converted to a string with
``str()``. Finally, :meth:`__showtree__` can be overridden to build a
nice tree representation of the node, if desired, for
:meth:`~.Wikicode.get_tree`.
"""
def __unicode__(self):
raise NotImplementedError()

def __children__(self):
return
# pylint: disable=unreachable
yield # pragma: no cover (this is a generator that yields nothing)

def __strip__(self, **kwargs):
return None

def __showtree__(self, write, get, mark):
write(str(self))


from . import extras
from ._base import Node
from .text import Text
from .argument import Argument
from .comment import Comment
@@ -73,3 +39,6 @@ from .html_entity import HTMLEntity
from .tag import Tag
from .template import Template
from .wikilink import Wikilink

__all__ = ["Argument", "Comment", "ExternalLink", "HTMLEntity", "Heading",
"Node", "Tag", "Template", "Text", "Wikilink"]

+ 51
- 0
mwparserfromhell/nodes/_base.py View File

@@ -0,0 +1,51 @@
#
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from ..string_mixin import StringMixIn

__all__ = ["Node"]

class Node(StringMixIn):
"""Represents the base Node type, demonstrating the methods to override.

:meth:`__str__` must be overridden. It should return a ``str``
representation of the node. If the node contains :class:`.Wikicode`
objects inside of it, :meth:`__children__` should be a generator that
iterates over them. If the node is printable (shown when the page is
rendered), :meth:`__strip__` should return its printable version,
stripping out any formatting marks. It does not have to return a string,
but something that can be converted to a string with ``str()``. Finally,
:meth:`__showtree__` can be overridden to build a nice tree representation
of the node, if desired, for :meth:`~.Wikicode.get_tree`.
"""
def __str__(self):
raise NotImplementedError()

def __children__(self):
return
# pylint: disable=unreachable
yield # pragma: no cover (this is a generator that yields nothing)

def __strip__(self, **kwargs):
return None

def __showtree__(self, write, get, mark):
write(str(self))

+ 3
- 3
mwparserfromhell/nodes/argument.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -20,7 +20,7 @@
# SOFTWARE.


from . import Node
from ._base import Node
from ..utils import parse_anything

__all__ = ["Argument"]
@@ -33,7 +33,7 @@ class Argument(Node):
self.name = name
self.default = default

def __unicode__(self):
def __str__(self):
start = "{{{" + str(self.name)
if self.default is not None:
return start + "|" + str(self.default) + "}}}"


+ 3
- 3
mwparserfromhell/nodes/comment.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -20,7 +20,7 @@
# SOFTWARE.


from . import Node
from ._base import Node

__all__ = ["Comment"]

@@ -31,7 +31,7 @@ class Comment(Node):
super().__init__()
self.contents = contents

def __unicode__(self):
def __str__(self):
return "<!--" + self.contents + "-->"

@property


+ 4
- 3
mwparserfromhell/nodes/external_link.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -20,7 +20,7 @@
# SOFTWARE.


from . import Node
from ._base import Node
from ..utils import parse_anything

__all__ = ["ExternalLink"]
@@ -34,7 +34,7 @@ class ExternalLink(Node):
self.title = title
self.brackets = brackets

def __unicode__(self):
def __str__(self):
if self.brackets:
if self.title is not None:
return "[" + str(self.url) + " " + str(self.title) + "]"
@@ -79,6 +79,7 @@ class ExternalLink(Node):

@url.setter
def url(self, value):
# pylint: disable=import-outside-toplevel
from ..parser import contexts
self._url = parse_anything(value, contexts.EXT_LINK_URI)



+ 2
- 2
mwparserfromhell/nodes/extras/attribute.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -44,7 +44,7 @@ class Attribute(StringMixIn):
self.pad_before_eq = pad_before_eq
self.pad_after_eq = pad_after_eq

def __unicode__(self):
def __str__(self):
result = self.pad_first + str(self.name) + self.pad_before_eq
if self.value is not None:
result += "=" + self.pad_after_eq


+ 2
- 2
mwparserfromhell/nodes/extras/parameter.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -41,7 +41,7 @@ class Parameter(StringMixIn):
self.value = value
self.showkey = showkey

def __unicode__(self):
def __str__(self):
if self.showkey:
return str(self.name) + "=" + str(self.value)
return str(self.value)


+ 3
- 3
mwparserfromhell/nodes/heading.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -20,7 +20,7 @@
# SOFTWARE.


from . import Node
from ._base import Node
from ..utils import parse_anything

__all__ = ["Heading"]
@@ -33,7 +33,7 @@ class Heading(Node):
self.title = title
self.level = level

def __unicode__(self):
def __str__(self):
return ("=" * self.level) + str(self.title) + ("=" * self.level)

def __children__(self):


+ 13
- 12
mwparserfromhell/nodes/html_entity.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -21,7 +21,7 @@

import html.entities as htmlentities

from . import Node
from ._base import Node

__all__ = ["HTMLEntity"]

@@ -49,7 +49,7 @@ class HTMLEntity(Node):
self._hexadecimal = hexadecimal
self._hex_char = hex_char

def __unicode__(self):
def __str__(self):
if self.named:
return "&{};".format(self.value)
if self.hexadecimal:
@@ -98,21 +98,22 @@ class HTMLEntity(Node):
int(newval)
except ValueError:
try:
int(newval, 16)
intval = int(newval, 16)
except ValueError:
if newval not in htmlentities.entitydefs:
raise ValueError("entity value is not a valid name")
raise ValueError(f"entity value {newval!r} is not a valid name") from None
self._named = True
self._hexadecimal = False
else:
if int(newval, 16) < 0 or int(newval, 16) > 0x10FFFF:
raise ValueError("entity value is not in range(0x110000)")
if intval < 0 or intval > 0x10FFFF:
raise ValueError(
f"entity value 0x{intval:x} is not in range(0x110000)") from None
self._named = False
self._hexadecimal = True
else:
test = int(newval, 16 if self.hexadecimal else 10)
if test < 0 or test > 0x10FFFF:
raise ValueError("entity value is not in range(0x110000)")
raise ValueError(f"entity value {test} is not in range(0x110000)")
self._named = False
self._value = newval

@@ -120,13 +121,13 @@ class HTMLEntity(Node):
def named(self, newval):
newval = bool(newval)
if newval and self.value not in htmlentities.entitydefs:
raise ValueError("entity value is not a valid name")
raise ValueError(f"entity value {self.value!r} is not a valid name")
if not newval:
try:
int(self.value, 16)
except ValueError:
err = "current entity value is not a valid Unicode codepoint"
raise ValueError(err)
except ValueError as exc:
raise ValueError(f"current entity value {self.value!r} "
f"is not a valid Unicode codepoint") from exc
self._named = newval

@hexadecimal.setter


+ 6
- 7
mwparserfromhell/nodes/tag.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -20,7 +20,7 @@
# SOFTWARE.


from . import Node
from ._base import Node
from .extras import Attribute
from ..definitions import is_visible
from ..utils import parse_anything
@@ -50,7 +50,7 @@ class Tag(Node):
if closing_wiki_markup is not None:
self.closing_wiki_markup = closing_wiki_markup

def __unicode__(self):
def __str__(self):
if self.wiki_markup:
if self.attributes:
attrs = "".join([str(attr) for attr in self.attributes])
@@ -60,10 +60,9 @@ class Tag(Node):
separator = self.wiki_style_separator or ""
if self.self_closing:
return self.wiki_markup + attrs + padding + separator
else:
close = self.closing_wiki_markup or ""
return self.wiki_markup + attrs + padding + separator + \
str(self.contents) + close
close = self.closing_wiki_markup or ""
return self.wiki_markup + attrs + padding + separator + \
str(self.contents) + close

result = ("</" if self.invalid else "<") + str(self.tag)
if self.attributes:


+ 13
- 11
mwparserfromhell/nodes/template.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -22,7 +22,9 @@
from collections import defaultdict
import re

from . import HTMLEntity, Node, Text
from ._base import Node
from .html_entity import HTMLEntity
from .text import Text
from .extras import Parameter
from ..utils import parse_anything

@@ -43,12 +45,11 @@ class Template(Node):
else:
self._params = []

def __unicode__(self):
def __str__(self):
if self.params:
params = "|".join([str(param) for param in self.params])
return "{{" + str(self.name) + "|" + params + "}}"
else:
return "{{" + str(self.name) + "}}"
return "{{" + str(self.name) + "}}"

def __children__(self):
yield self.name
@@ -102,6 +103,7 @@ class Template(Node):
confidence = float(best) / sum(values)
if confidence > 0.5:
return tuple(theories.keys())[values.index(best)]
return None

@staticmethod
def _blank_param_value(value):
@@ -229,8 +231,7 @@ class Template(Node):
return param
if default is _UNSET:
raise ValueError(name)
else:
return default
return default

def __getitem__(self, name):
return self.get(name)
@@ -339,19 +340,20 @@ class Template(Node):
hidden name, if it exists, or the first instance).
"""
if isinstance(param, Parameter):
return self._remove_exact(param, keep_field)
self._remove_exact(param, keep_field)
return

name = str(param).strip()
removed = False
to_remove = []

for i, param in enumerate(self.params):
if param.name.strip() == name:
for i, par in enumerate(self.params):
if par.name.strip() == name:
if keep_field:
if self._should_remove(i, name):
to_remove.append(i)
else:
self._blank_param_value(param.value)
self._blank_param_value(par.value)
keep_field = False
else:
self._fix_dependendent_params(i)


+ 3
- 3
mwparserfromhell/nodes/text.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -20,7 +20,7 @@
# SOFTWARE.


from . import Node
from ._base import Node

__all__ = ["Text"]

@@ -31,7 +31,7 @@ class Text(Node):
super().__init__()
self.value = value

def __unicode__(self):
def __str__(self):
return self.value

def __strip__(self, **kwargs):


+ 3
- 3
mwparserfromhell/nodes/wikilink.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -20,7 +20,7 @@
# SOFTWARE.


from . import Node
from ._base import Node
from ..utils import parse_anything

__all__ = ["Wikilink"]
@@ -33,7 +33,7 @@ class Wikilink(Node):
self.title = title
self.text = text

def __unicode__(self):
def __str__(self):
if self.text is not None:
return "[[" + str(self.title) + "|" + str(self.text) + "]]"
return "[[" + str(self.title) + "]]"


+ 2
- 14
mwparserfromhell/parser/__init__.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -25,20 +25,8 @@ modules: the :mod:`.tokenizer` and the :mod:`.builder`. This module joins them
together into one interface.
"""

class ParserError(Exception):
"""Exception raised when an internal error occurs while parsing.

This does not mean that the wikicode was invalid, because invalid markup
should still be parsed correctly. This means that the parser caught itself
with an impossible internal state and is bailing out before other problems
can happen. Its appearance indicates a bug.
"""
def __init__(self, extra):
msg = "This is a bug and should be reported. Info: {}.".format(extra)
super().__init__(msg)


from .builder import Builder
from .errors import ParserError
try:
from ._tokenizer import CTokenizer
use_c = True


+ 6
- 7
mwparserfromhell/parser/builder.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -20,7 +20,8 @@
# SOFTWARE.


from . import tokens, ParserError
from . import tokens
from .errors import ParserError
from ..nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, Tag,
Template, Text, Wikilink)
from ..nodes.extras import Attribute, Parameter
@@ -198,8 +199,7 @@ class Builder:
if isinstance(token, tokens.HeadingEnd):
title = self._pop()
return Heading(title, level)
else:
self._write(self._handle_token(token))
self._write(self._handle_token(token))
raise ParserError("_handle_heading() missed a close token")

@_add_handler(tokens.CommentStart)
@@ -211,8 +211,7 @@ class Builder:
if isinstance(token, tokens.CommentEnd):
contents = self._pop()
return Comment(contents)
else:
self._write(self._handle_token(token))
self._write(self._handle_token(token))
raise ParserError("_handle_comment() missed a close token")

def _handle_attribute(self, start):
@@ -283,7 +282,7 @@ class Builder:
return _HANDLERS[type(token)](self, token)
except KeyError:
err = "_handle_token() got unexpected {0}"
raise ParserError(err.format(type(token).__name__))
raise ParserError(err.format(type(token).__name__)) from None

def build(self, tokenlist):
"""Build a Wikicode object from a list tokens and return it."""


+ 1
- 1
mwparserfromhell/parser/ctokenizer/definitions.c View File

@@ -1,5 +1,5 @@
/*
Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in


+ 34
- 0
mwparserfromhell/parser/errors.py View File

@@ -0,0 +1,34 @@
#
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

__all__ = ["ParserError"]

class ParserError(Exception):
"""Exception raised when an internal error occurs while parsing.

This does not mean that the wikicode was invalid, because invalid markup
should still be parsed correctly. This means that the parser caught itself
with an impossible internal state and is bailing out before other problems
can happen. Its appearance indicates a bug.
"""
def __init__(self, extra):
msg = "This is a bug and should be reported. Info: {}.".format(extra)
super().__init__(msg)

+ 74
- 72
mwparserfromhell/parser/tokenizer.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -23,7 +23,8 @@ import html.entities as htmlentities
from math import log
import re

from . import contexts, tokens, ParserError
from . import contexts, tokens
from .errors import ParserError
from ..definitions import (get_html_tag, is_parsable, is_single,
is_single_only, is_scheme)

@@ -323,7 +324,7 @@ class Tokenizer:
self._head += 2
try:
# If the wikilink looks like an external link, parse it as such:
link, extra, delta = self._really_parse_external_link(True)
link, _extra, _delta = self._really_parse_external_link(True)
except BadRoute:
self._head = reset + 1
try:
@@ -433,17 +434,17 @@ class Tokenizer:
self._emit_text(this)
return punct, tail

def _is_free_link_end(self, this, next):
def _is_free_link_end(self, this, nxt):
"""Return whether the current head is the end of a free link."""
# Built from _parse()'s end sentinels:
after, ctx = self._read(2), self._context
equal_sign_contexts = contexts.TEMPLATE_PARAM_KEY | contexts.HEADING
return (this in (self.END, "\n", "[", "]", "<", ">") or
this == next == "'" or
this == nxt == "'" or
(this == "|" and ctx & contexts.TEMPLATE) or
(this == "=" and ctx & equal_sign_contexts) or
(this == next == "}" and ctx & contexts.TEMPLATE) or
(this == next == after == "}" and ctx & contexts.ARGUMENT))
(this == nxt == "}" and ctx & contexts.TEMPLATE) or
(this == nxt == after == "}" and ctx & contexts.ARGUMENT))

def _really_parse_external_link(self, brackets):
"""Really parse an external link."""
@@ -458,23 +459,23 @@ class Tokenizer:
self._fail_route()
tail = ""
while True:
this, next = self._read(), self._read(1)
this, nxt = self._read(), self._read(1)
if this == "&":
if tail:
self._emit_text(tail)
tail = ""
self._parse_entity()
elif (this == "<" and next == "!" and self._read(2) ==
elif (this == "<" and nxt == "!" and self._read(2) ==
self._read(3) == "-"):
if tail:
self._emit_text(tail)
tail = ""
self._parse_comment()
elif not brackets and self._is_free_link_end(this, next):
elif not brackets and self._is_free_link_end(this, nxt):
return self._pop(), tail, -1
elif this is self.END or this == "\n":
self._fail_route()
elif this == next == "{" and self._can_recurse():
elif this == nxt == "{" and self._can_recurse():
if tail:
self._emit_text(tail)
tail = ""
@@ -702,12 +703,12 @@ class Tokenizer:

def _handle_tag_text(self, text):
"""Handle regular *text* inside of an HTML open tag."""
next = self._read(1)
nxt = self._read(1)
if not self._can_recurse() or text not in self.MARKERS:
self._emit_text(text)
elif text == next == "{":
elif text == nxt == "{":
self._parse_template_or_argument()
elif text == next == "[":
elif text == nxt == "[":
self._parse_wikilink()
elif text == "<":
self._parse_tag()
@@ -796,10 +797,10 @@ class Tokenizer:
"""Handle the body of an HTML tag that is parser-blacklisted."""
strip = lambda text: text.rstrip().lower()
while True:
this, next = self._read(), self._read(1)
this, nxt = self._read(), self._read(1)
if this is self.END:
self._fail_route()
elif this == "<" and next == "/":
elif this == "<" and nxt == "/":
self._head += 3
if self._read() != ">" or (strip(self._read(-1)) !=
strip(self._stack[1].text)):
@@ -854,7 +855,7 @@ class Tokenizer:
self._push(contexts.TAG_OPEN)
self._emit(tokens.TagOpenOpen())
while True:
this, next = self._read(), self._read(1)
this, nxt = self._read(), self._read(1)
can_exit = (not data.context & (data.CX_QUOTED | data.CX_NAME) or
data.context & data.CX_NOTE_SPACE)
if this is self.END:
@@ -876,7 +877,7 @@ class Tokenizer:
if is_parsable(self._stack[1].text):
return self._parse(push=False)
return self._handle_blacklisted_tag()
elif this == "/" and next == ">" and can_exit:
elif this == "/" and nxt == ">" and can_exit:
self._handle_tag_close_open(data, tokens.TagCloseSelfclose)
return self._pop()
else:
@@ -933,9 +934,11 @@ class Tokenizer:
stack = self._parse(new_ctx)
except BadRoute:
self._head = reset
return self._emit_text("''")
self._emit_text("''")
return
else:
return self._emit_text("''")
self._emit_text("''")
return
self._emit_style_tag("i", "''", stack)

def _parse_bold(self):
@@ -948,7 +951,7 @@ class Tokenizer:
if self._context & contexts.STYLE_SECOND_PASS:
self._emit_text("'")
return True
elif self._context & contexts.STYLE_ITALICS:
if self._context & contexts.STYLE_ITALICS:
self._context |= contexts.STYLE_PASS_AGAIN
self._emit_text("'''")
else:
@@ -956,6 +959,7 @@ class Tokenizer:
self._parse_italics()
else:
self._emit_style_tag("b", "'''", stack)
return False

def _parse_italics_and_bold(self):
"""Parse wiki-style italics and bold together (i.e., five ticks)."""
@@ -1017,7 +1021,7 @@ class Tokenizer:
if ticks == 5:
self._head -= 3 if italics else 2
return self._pop()
elif not self._can_recurse():
if not self._can_recurse():
if ticks == 3:
if self._context & contexts.STYLE_SECOND_PASS:
self._emit_text("'")
@@ -1101,7 +1105,7 @@ class Tokenizer:
if this.isspace():
data.padding_buffer["first"] += this
return data.padding_buffer["first"]
elif this is self.END or this == end_token:
if this is self.END or this == end_token:
if self._context & contexts.TAG_ATTR:
if data.context & data.CX_QUOTED:
# Unclosed attribute quote: reset, don't die
@@ -1241,9 +1245,9 @@ class Tokenizer:
if context & contexts.FAIL_NEXT:
return False
if context & contexts.WIKILINK_TITLE:
if this == "]" or this == "{":
if this in ("]", "{"):
self._context |= contexts.FAIL_NEXT
elif this == "\n" or this == "[" or this == "}" or this == ">":
elif this in ("\n", "[", "}", ">"):
return False
elif this == "<":
if self._read(1) == "!":
@@ -1251,16 +1255,16 @@ class Tokenizer:
else:
return False
return True
elif context & contexts.EXT_LINK_TITLE:
if context & contexts.EXT_LINK_TITLE:
return this != "\n"
elif context & contexts.TEMPLATE_NAME:
if context & contexts.TEMPLATE_NAME:
if this == "{":
self._context |= contexts.HAS_TEMPLATE | contexts.FAIL_NEXT
return True
if this == "}" or (this == "<" and self._read(1) == "!"):
self._context |= contexts.FAIL_NEXT
return True
if this == "[" or this == "]" or this == "<" or this == ">":
if this in ("[", "]", "<", ">"):
return False
if this == "|":
return True
@@ -1273,30 +1277,29 @@ class Tokenizer:
elif this is self.END or not this.isspace():
self._context |= contexts.HAS_TEXT
return True
elif context & contexts.TAG_CLOSE:
if context & contexts.TAG_CLOSE:
return this != "<"
else:
if context & contexts.FAIL_ON_EQUALS:
if this == "=":
return False
elif context & contexts.FAIL_ON_LBRACE:
if this == "{" or (self._read(-1) == self._read(-2) == "{"):
if context & contexts.TEMPLATE:
self._context |= contexts.FAIL_ON_EQUALS
else:
self._context |= contexts.FAIL_NEXT
return True
self._context ^= contexts.FAIL_ON_LBRACE
elif context & contexts.FAIL_ON_RBRACE:
if this == "}":
if context & contexts.FAIL_ON_EQUALS:
if this == "=":
return False
elif context & contexts.FAIL_ON_LBRACE:
if this == "{" or (self._read(-1) == self._read(-2) == "{"):
if context & contexts.TEMPLATE:
self._context |= contexts.FAIL_ON_EQUALS
else:
self._context |= contexts.FAIL_NEXT
return True
self._context ^= contexts.FAIL_ON_RBRACE
elif this == "{":
self._context |= contexts.FAIL_ON_LBRACE
elif this == "}":
self._context |= contexts.FAIL_ON_RBRACE
return True
return True
self._context ^= contexts.FAIL_ON_LBRACE
elif context & contexts.FAIL_ON_RBRACE:
if this == "}":
self._context |= contexts.FAIL_NEXT
return True
self._context ^= contexts.FAIL_ON_RBRACE
elif this == "{":
self._context |= contexts.FAIL_ON_LBRACE
elif this == "}":
self._context |= contexts.FAIL_ON_RBRACE
return True

def _parse(self, context=0, push=True):
"""Parse the wikicode string, using *context* for when to stop."""
@@ -1315,8 +1318,8 @@ class Tokenizer:
continue
if this is self.END:
return self._handle_end()
next = self._read(1)
if this == next == "{":
nxt = self._read(1)
if this == nxt == "{":
if self._can_recurse():
self._parse_template_or_argument()
else:
@@ -1325,23 +1328,22 @@ class Tokenizer:
self._handle_template_param()
elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY:
self._handle_template_param_value()
elif this == next == "}" and self._context & contexts.TEMPLATE:
elif this == nxt == "}" and self._context & contexts.TEMPLATE:
return self._handle_template_end()
elif this == "|" and self._context & contexts.ARGUMENT_NAME:
self._handle_argument_separator()
elif this == next == "}" and self._context & contexts.ARGUMENT:
elif this == nxt == "}" and self._context & contexts.ARGUMENT:
if self._read(2) == "}":
return self._handle_argument_end()
else:
self._emit_text("}")
elif this == next == "[" and self._can_recurse():
self._emit_text("}")
elif this == nxt == "[" and self._can_recurse():
if not self._context & contexts.NO_WIKILINKS:
self._parse_wikilink()
else:
self._emit_text("[")
elif this == "|" and self._context & contexts.WIKILINK_TITLE:
self._handle_wikilink_separator()
elif this == next == "]" and self._context & contexts.WIKILINK:
elif this == nxt == "]" and self._context & contexts.WIKILINK:
return self._handle_wikilink_end()
elif this == "[":
self._parse_external_link(True)
@@ -1360,12 +1362,12 @@ class Tokenizer:
self._fail_route()
elif this == "&":
self._parse_entity()
elif this == "<" and next == "!":
elif this == "<" and nxt == "!":
if self._read(2) == self._read(3) == "-":
self._parse_comment()
else:
self._emit_text(this)
elif this == "<" and next == "/" and self._read(2) is not self.END:
elif this == "<" and nxt == "/" and self._read(2) is not self.END:
if self._context & contexts.TAG_BODY:
self._handle_tag_open_close()
else:
@@ -1377,14 +1379,14 @@ class Tokenizer:
self._emit_text("<")
elif this == ">" and self._context & contexts.TAG_CLOSE:
return self._handle_tag_close_close()
elif this == next == "'" and not self._skip_style_tags:
elif this == nxt == "'" and not self._skip_style_tags:
result = self._parse_style()
if result is not None:
return result
elif self._read(-1) in ("\n", self.START) and this in ("#", "*", ";", ":"):
self._handle_list()
elif self._read(-1) in ("\n", self.START) and (
this == next == self._read(2) == self._read(3) == "-"):
this == nxt == self._read(2) == self._read(3) == "-"):
self._handle_hr()
elif this in ("\n", ":") and self._context & contexts.DL_TERM:
self._handle_dl_term()
@@ -1392,7 +1394,7 @@ class Tokenizer:
# Kill potential table contexts
self._context &= ~contexts.TABLE_CELL_LINE_CONTEXTS
# Start of table parsing
elif this == "{" and next == "|" and (
elif this == "{" and nxt == "|" and (
self._read(-1) in ("\n", self.START) or
(self._read(-2) in ("\n", self.START) and self._read(-1).isspace())):
if self._can_recurse():
@@ -1400,15 +1402,15 @@ class Tokenizer:
else:
self._emit_text("{")
elif self._context & contexts.TABLE_OPEN:
if this == next == "|" and self._context & contexts.TABLE_TD_LINE:
if this == nxt == "|" and self._context & contexts.TABLE_TD_LINE:
if self._context & contexts.TABLE_CELL_OPEN:
return self._handle_table_cell_end()
self._handle_table_cell("||", "td", contexts.TABLE_TD_LINE)
elif this == next == "|" and self._context & contexts.TABLE_TH_LINE:
elif this == nxt == "|" and self._context & contexts.TABLE_TH_LINE:
if self._context & contexts.TABLE_CELL_OPEN:
return self._handle_table_cell_end()
self._handle_table_cell("||", "th", contexts.TABLE_TH_LINE)
elif this == next == "!" and self._context & contexts.TABLE_TH_LINE:
elif this == nxt == "!" and self._context & contexts.TABLE_TH_LINE:
if self._context & contexts.TABLE_CELL_OPEN:
return self._handle_table_cell_end()
self._handle_table_cell("!!", "th", contexts.TABLE_TH_LINE)
@@ -1420,13 +1422,13 @@ class Tokenizer:
self._emit_text(this)
elif (self._read(-1) in ("\n", self.START) or
(self._read(-2) in ("\n", self.START) and self._read(-1).isspace())):
if this == "|" and next == "}":
if this == "|" and nxt == "}":
if self._context & contexts.TABLE_CELL_OPEN:
return self._handle_table_cell_end()
if self._context & contexts.TABLE_ROW_OPEN:
return self._handle_table_row_end()
return self._handle_table_end()
elif this == "|" and next == "-":
if this == "|" and nxt == "-":
if self._context & contexts.TABLE_CELL_OPEN:
return self._handle_table_cell_end()
if self._context & contexts.TABLE_ROW_OPEN:
@@ -1458,10 +1460,10 @@ class Tokenizer:
self._skip_style_tags = skip_style_tags

try:
tokens = self._parse(context)
except BadRoute: # pragma: no cover (untestable/exceptional case)
raise ParserError("Python tokenizer exited with BadRoute")
result = self._parse(context)
except BadRoute as exc: # pragma: no cover (untestable/exceptional case)
raise ParserError("Python tokenizer exited with BadRoute") from exc
if self._stacks: # pragma: no cover (untestable/exceptional case)
err = "Python tokenizer exited with non-empty token stack"
raise ParserError(err)
return tokens
return result

+ 1
- 2
mwparserfromhell/parser/tokens.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -27,7 +27,6 @@ a syntactically valid form by the :class:`.Tokenizer`, and then converted into
the :class`.Wikicode` tree by the :class:`.Builder`.
"""


__all__ = ["Token"]

class Token(dict):


+ 4
- 3
mwparserfromhell/smart_list/__init__.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2019-2020 Yuri Astrakhan <YuriAstrakhan@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -22,8 +22,9 @@

"""
This module contains the :class:`.SmartList` type, as well as its
:class:`._ListProxy` child, which together implement a list whose sublists
:class:`.ListProxy` child, which together implement a list whose sublists
reflect changes made to the main list, and vice-versa.
"""

from .SmartList import SmartList
from .list_proxy import ListProxy as _ListProxy
from .smart_list import SmartList

mwparserfromhell/smart_list/ListProxy.py → mwparserfromhell/smart_list/list_proxy.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2019-2020 Yuri Astrakhan <YuriAstrakhan@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -20,12 +20,10 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

# SmartList has to be a full import in order to avoid cyclical import errors
import mwparserfromhell.smart_list.SmartList
from .utils import _SliceNormalizerMixIn, inheritdoc


class _ListProxy(_SliceNormalizerMixIn, list):
class ListProxy(_SliceNormalizerMixIn, list):
"""Implement the ``list`` interface by getting elements from a parent.

This is created by a :class:`.SmartList` object when slicing. It does not
@@ -42,32 +40,32 @@ class _ListProxy(_SliceNormalizerMixIn, list):
return repr(self._render())

def __lt__(self, other):
if isinstance(other, _ListProxy):
if isinstance(other, ListProxy):
return self._render() < list(other)
return self._render() < other

def __le__(self, other):
if isinstance(other, _ListProxy):
if isinstance(other, ListProxy):
return self._render() <= list(other)
return self._render() <= other

def __eq__(self, other):
if isinstance(other, _ListProxy):
if isinstance(other, ListProxy):
return self._render() == list(other)
return self._render() == other

def __ne__(self, other):
if isinstance(other, _ListProxy):
if isinstance(other, ListProxy):
return self._render() != list(other)
return self._render() != other

def __gt__(self, other):
if isinstance(other, _ListProxy):
if isinstance(other, ListProxy):
return self._render() > list(other)
return self._render() > other

def __ge__(self, other):
if isinstance(other, _ListProxy):
if isinstance(other, ListProxy):
return self._render() >= list(other)
return self._render() >= other

@@ -84,8 +82,7 @@ class _ListProxy(_SliceNormalizerMixIn, list):
keystop = min(self._start + key.stop, self._stop)
adjusted = slice(keystart, keystop, key.step)
return self._parent[adjusted]
else:
return self._render()[key]
return self._render()[key]

def __setitem__(self, key, item):
if isinstance(key, slice):
@@ -133,20 +130,20 @@ class _ListProxy(_SliceNormalizerMixIn, list):
return item in self._render()

def __add__(self, other):
return mwparserfromhell.smart_list.SmartList(list(self) + other)
return type(self._parent)(list(self) + other)

def __radd__(self, other):
return mwparserfromhell.smart_list.SmartList(other + list(self))
return type(self._parent)(other + list(self))

def __iadd__(self, other):
self.extend(other)
return self

def __mul__(self, other):
return mwparserfromhell.smart_list.SmartList(list(self) * other)
return type(self._parent)(list(self) * other)

def __rmul__(self, other):
return mwparserfromhell.smart_list.SmartList(other * list(self))
return type(self._parent)(other * list(self))

def __imul__(self, other):
self.extend(list(self) * (other - 1))

mwparserfromhell/smart_list/SmartList.py → mwparserfromhell/smart_list/smart_list.py View File

@@ -1,4 +1,5 @@
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2019-2020 Yuri Astrakhan <YuriAstrakhan@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -19,9 +20,9 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from _weakref import ref
from weakref import ref

from .ListProxy import _ListProxy
from .list_proxy import ListProxy
from .utils import _SliceNormalizerMixIn, inheritdoc


@@ -32,7 +33,7 @@ class SmartList(_SliceNormalizerMixIn, list):
list (such as the addition, removal, or replacement of elements) will be
reflected in the sublist, or vice-versa, to the greatest degree possible.
This is implemented by having sublists - instances of the
:class:`._ListProxy` type - dynamically determine their elements by storing
:class:`.ListProxy` type - dynamically determine their elements by storing
their slice info and retrieving that slice from the parent. Methods that
change the size of the list also change the slice info. For example::

@@ -61,21 +62,22 @@ class SmartList(_SliceNormalizerMixIn, list):
return super().__getitem__(key)
key = self._normalize_slice(key, clamp=False)
sliceinfo = [key.start, key.stop, key.step]
child = _ListProxy(self, sliceinfo)
child = ListProxy(self, sliceinfo)
child_ref = ref(child, self._delete_child)
self._children[id(child_ref)] = (child_ref, sliceinfo)
return child

def __setitem__(self, key, item):
if not isinstance(key, slice):
return super().__setitem__(key, item)
super().__setitem__(key, item)
return
item = list(item)
super().__setitem__(key, item)
key = self._normalize_slice(key, clamp=True)
diff = len(item) + (key.start - key.stop) // key.step
if not diff:
return
for child, (start, stop, step) in self._children.values():
for child, (start, stop, _step) in self._children.values():
if start > key.stop:
self._children[id(child)][1][0] += diff
if stop is not None and stop >= key.stop:
@@ -88,7 +90,7 @@ class SmartList(_SliceNormalizerMixIn, list):
else:
key = slice(key, key + 1, 1)
diff = (key.stop - key.start) // key.step
for child, (start, stop, step) in self._children.values():
for child, (start, stop, _step) in self._children.values():
if start > key.start:
self._children[id(child)][1][0] -= diff
if stop is not None and stop >= key.stop:

+ 22
- 25
mwparserfromhell/string_mixin.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -38,67 +38,64 @@ def inheritdoc(method):
return method

class StringMixIn:
"""Implement the interface for ``unicode``/``str`` in a dynamic manner.
"""Implement the interface for ``str`` in a dynamic manner.

To use this class, inherit from it and override the :meth:`__unicode__`
method to return the string representation of the object.
The various string methods will operate on the value of :meth:`__unicode__`
instead of the immutable ``self`` like the regular ``str`` type.
To use this class, inherit from it and override the :meth:`__str__` method
to return the string representation of the object. The various string
methods will operate on the value of :meth:`__str__` instead of the
immutable ``self`` like the regular ``str`` type.
"""

def __str__(self):
return self.__unicode__()
raise NotImplementedError()

def __bytes__(self):
return bytes(self.__unicode__(), getdefaultencoding())

def __unicode__(self):
raise NotImplementedError()
return bytes(self.__str__(), getdefaultencoding())

def __repr__(self):
return repr(self.__unicode__())
return repr(self.__str__())

def __lt__(self, other):
return self.__unicode__() < other
return self.__str__() < other

def __le__(self, other):
return self.__unicode__() <= other
return self.__str__() <= other

def __eq__(self, other):
return self.__unicode__() == other
return self.__str__() == other

def __ne__(self, other):
return self.__unicode__() != other
return self.__str__() != other

def __gt__(self, other):
return self.__unicode__() > other
return self.__str__() > other

def __ge__(self, other):
return self.__unicode__() >= other
return self.__str__() >= other

def __bool__(self):
return bool(self.__unicode__())
return bool(self.__str__())

def __len__(self):
return len(self.__unicode__())
return len(self.__str__())

def __iter__(self):
yield from self.__unicode__()
yield from self.__str__()

def __getitem__(self, key):
return self.__unicode__()[key]
return self.__str__()[key]

def __reversed__(self):
return reversed(self.__unicode__())
return reversed(self.__str__())

def __contains__(self, item):
return str(item) in self.__unicode__()
return str(item) in self.__str__()

def __getattr__(self, attr):
if not hasattr(str, attr):
raise AttributeError("{!r} object has no attribute {!r}".format(
type(self).__name__, attr))
return getattr(self.__unicode__(), attr)
return getattr(self.__str__(), attr)

maketrans = str.maketrans # Static method can't rely on __getattr__



+ 20
- 21
mwparserfromhell/utils.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -24,48 +24,47 @@ This module contains accessory functions for other parts of the library. Parser
users generally won't need stuff from here.
"""


from .nodes import Node
from .smart_list import SmartList

__all__ = ["parse_anything"]

def parse_anything(value, context=0, skip_style_tags=False):
"""Return a :class:`.Wikicode` for *value*, allowing multiple types.

This differs from :meth:`.Parser.parse` in that we accept more than just a
string to be parsed. Unicode objects (strings in py3k), strings (bytes in
py3k), integers (converted to strings), ``None``, existing :class:`.Node`
or :class:`.Wikicode` objects, as well as an iterable of these types, are
supported. This is used to parse input on-the-fly by various methods of
:class:`.Wikicode` and others like :class:`.Template`, such as
:meth:`wikicode.insert() <.Wikicode.insert>` or setting
:meth:`template.name <.Template.name>`.
string to be parsed. Strings, bytes, integers (converted to strings),
``None``, existing :class:`.Node` or :class:`.Wikicode` objects, as well
as an iterable of these types, are supported. This is used to parse input
on-the-fly by various methods of :class:`.Wikicode` and others like
:class:`.Template`, such as :meth:`wikicode.insert() <.Wikicode.insert>`
or setting :meth:`template.name <.Template.name>`.

Additional arguments are passed directly to :meth:`.Parser.parse`.
"""
# pylint: disable=cyclic-import,import-outside-toplevel
from .nodes import Node
from .parser import Parser
from .smart_list import SmartList
from .wikicode import Wikicode

if isinstance(value, Wikicode):
return value
elif isinstance(value, Node):
if isinstance(value, Node):
return Wikicode(SmartList([value]))
elif isinstance(value, str):
if isinstance(value, str):
return Parser().parse(value, context, skip_style_tags)
elif isinstance(value, bytes):
if isinstance(value, bytes):
return Parser().parse(value.decode("utf8"), context, skip_style_tags)
elif isinstance(value, int):
if isinstance(value, int):
return Parser().parse(str(value), context, skip_style_tags)
elif value is None:
if value is None:
return Wikicode(SmartList())
elif hasattr(value, "read"):
if hasattr(value, "read"):
return parse_anything(value.read(), context, skip_style_tags)
try:
nodelist = SmartList()
for item in value:
nodelist += parse_anything(item, context, skip_style_tags).nodes
return Wikicode(nodelist)
except TypeError:
error = "Needs string, Node, Wikicode, file, int, None, or iterable of these, but got {0}: {1}"
raise ValueError(error.format(type(value).__name__, value))
except TypeError as exc:
raise ValueError(f"Needs string, Node, Wikicode, file, int, None, or "
f"iterable of these, but got {type(value).__name__}: "
f"{value}") from exc

+ 16
- 16
mwparserfromhell/wikicode.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -24,7 +24,7 @@ from itertools import chain

from .nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity,
Node, Tag, Template, Text, Wikilink)
from .smart_list.ListProxy import _ListProxy
from .smart_list.list_proxy import ListProxy
from .string_mixin import StringMixIn
from .utils import parse_anything

@@ -48,7 +48,7 @@ class Wikicode(StringMixIn):
super().__init__()
self._nodes = nodes

def __unicode__(self):
def __str__(self):
return "".join([str(node) for node in self.nodes])

@staticmethod
@@ -108,7 +108,7 @@ class Wikicode(StringMixIn):
def _is_child_wikicode(self, obj, recursive=True):
"""Return whether the given :class:`.Wikicode` is a descendant."""
def deref(nodes):
if isinstance(nodes, _ListProxy):
if isinstance(nodes, ListProxy):
return nodes._parent # pylint: disable=protected-access
return nodes

@@ -249,12 +249,12 @@ class Wikicode(StringMixIn):
make_filter = lambda ftype: (lambda self, *a, **kw:
self.filter(forcetype=ftype, *a, **kw))
for name, ftype in meths.items():
ifilter = make_ifilter(ftype)
filter = make_filter(ftype)
ifilter.__doc__ = doc.format(name, "ifilter", ftype)
filter.__doc__ = doc.format(name, "filter", ftype)
setattr(cls, "ifilter_" + name, ifilter)
setattr(cls, "filter_" + name, filter)
ifilt = make_ifilter(ftype)
filt = make_filter(ftype)
ifilt.__doc__ = doc.format(name, "ifilter", ftype)
filt.__doc__ = doc.format(name, "filter", ftype)
setattr(cls, "ifilter_" + name, ifilt)
setattr(cls, "filter_" + name, filt)

@property
def nodes(self):
@@ -351,6 +351,7 @@ class Wikicode(StringMixIn):
ancestors = _get_ancestors(code, needle)
if ancestors is not None:
return [node] + ancestors
return None

if isinstance(obj, Wikicode):
obj = obj.get(0)
@@ -443,13 +444,13 @@ class Wikicode(StringMixIn):
"""
if isinstance(obj, (Node, Wikicode)):
context, index = self._do_strong_search(obj, recursive)
for i in range(index.start, index.stop):
for _ in range(index.start, index.stop):
context.nodes.pop(index.start)
context.insert(index.start, value)
else:
for exact, context, index in self._do_weak_search(obj, recursive):
if exact:
for i in range(index.start, index.stop):
for _ in range(index.start, index.stop):
context.nodes.pop(index.start)
context.insert(index.start, value)
else:
@@ -478,12 +479,12 @@ class Wikicode(StringMixIn):
"""
if isinstance(obj, (Node, Wikicode)):
context, index = self._do_strong_search(obj, recursive)
for i in range(index.start, index.stop):
for _ in range(index.start, index.stop):
context.nodes.pop(index.start)
else:
for exact, context, index in self._do_weak_search(obj, recursive):
if exact:
for i in range(index.start, index.stop):
for _ in range(index.start, index.stop):
context.nodes.pop(index.start)
else:
self._slice_replace(context, index, str(obj), "")
@@ -645,8 +646,7 @@ class Wikicode(StringMixIn):
while "\n\n\n" in stripped:
stripped = stripped.replace("\n\n\n", "\n\n")
return stripped
else:
return "".join(nodes)
return "".join(nodes)

def get_tree(self):
"""Return a hierarchical tree representation of the object.


+ 7
- 11
scripts/memtest.py View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -21,10 +21,10 @@
# SOFTWARE.

"""
Tests for memory leaks in the CTokenizer. Python 2 and 3 compatible.
Tests for memory leaks in the CTokenizer.

This appears to work mostly fine under Linux, but gives an absurd number of
false positives on OS X. I'm not sure why. Running the tests multiple times
false positives on macOS. I'm not sure why. Running the tests multiple times
yields different results (tests don't always leak, and the amount they leak by
varies). Increasing the number of loops results in a smaller bytes/loop value,
too, indicating the increase in memory usage might be due to something else.
@@ -32,7 +32,6 @@ Actual memory leaks typically leak very large amounts of memory (megabytes)
and scale with the number of loops.
"""

from __future__ import unicode_literals, print_function
from locale import LC_ALL, setlocale
from multiprocessing import Process, Pipe
from os import listdir, path
@@ -42,19 +41,16 @@ import psutil

from mwparserfromhell.parser._tokenizer import CTokenizer

if sys.version_info[0] == 2:
range = xrange

LOOPS = 10000

class Color(object):
class Color:
GRAY = "\x1b[30;1m"
GREEN = "\x1b[92m"
YELLOW = "\x1b[93m"
RESET = "\x1b[0m"


class MemoryTest(object):
class MemoryTest:
"""Manages a memory test."""

def __init__(self):
@@ -151,13 +147,13 @@ class MemoryTest(object):

def _runner(text, child):
r1, r2 = range(250), range(LOOPS)
for i in r1:
for _ in r1:
CTokenizer().tokenize(text)
child.send("OK")
child.recv()
child.send("OK")
child.recv()
for i in r2:
for _ in r2:
CTokenizer().tokenize(text)
child.send("OK")
child.recv()


+ 1
- 1
setup.py View File

@@ -1,6 +1,6 @@
#! /usr/bin/env python
#
# Copyright (C) 2012-2018 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal


+ 4
- 5
tests/_test_tokenizer.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -29,7 +29,6 @@ from mwparserfromhell.parser.builder import Builder

class _TestParseError(Exception):
"""Raised internally when a test could not be parsed."""
pass


class TokenizerTestCase:
@@ -41,7 +40,7 @@ class TokenizerTestCase:
"""

@staticmethod
def _build_test_method(funcname, data):
def _build_test_method(data):
"""Create and return a method to be treated as a test case method.

*data* is a dict containing multiple keys: the *input* text to be
@@ -79,7 +78,7 @@ class TokenizerTestCase:
try:
data["output"] = eval(raw, vars(tokens))
except Exception as err:
raise _TestParseError(err)
raise _TestParseError(err) from err

@classmethod
def _load_tests(cls, filename, name, text, restrict=None):
@@ -115,7 +114,7 @@ class TokenizerTestCase:
continue

fname = "test_{}{}_{}".format(name, number, data["name"])
meth = cls._build_test_method(fname, data)
meth = cls._build_test_method(data)
setattr(cls, fname, meth)

@classmethod


+ 1
- 2
tests/_test_tree_equality.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -23,7 +23,6 @@ from unittest import TestCase

from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity,
Tag, Template, Text, Wikilink)
from mwparserfromhell.nodes.extras import Attribute, Parameter
from mwparserfromhell.smart_list import SmartList
from mwparserfromhell.wikicode import Wikicode



+ 3
- 3
tests/test_argument.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -28,8 +28,8 @@ from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext
class TestArgument(TreeEqualityTestCase):
"""Test cases for the Argument node."""

def test_unicode(self):
"""test Argument.__unicode__()"""
def test_str(self):
"""test Argument.__str__()"""
node = Argument(wraptext("foobar"))
self.assertEqual("{{{foobar}}}", str(node))
node2 = Argument(wraptext("foo"), wraptext("bar"))


+ 3
- 3
tests/test_attribute.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -29,8 +29,8 @@ from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext
class TestAttribute(TreeEqualityTestCase):
"""Test cases for the Attribute node extra."""

def test_unicode(self):
"""test Attribute.__unicode__()"""
def test_str(self):
"""test Attribute.__str__()"""
node = Attribute(wraptext("foo"))
self.assertEqual(" foo", str(node))
node2 = Attribute(wraptext("foo"), wraptext("bar"))


+ 3
- 3
tests/test_comment.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -28,8 +28,8 @@ from ._test_tree_equality import TreeEqualityTestCase
class TestComment(TreeEqualityTestCase):
"""Test cases for the Comment node."""

def test_unicode(self):
"""test Comment.__unicode__()"""
def test_str(self):
"""test Comment.__str__()"""
node = Comment("foobar")
self.assertEqual("<!--foobar-->", str(node))



+ 4
- 4
tests/test_docs.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -31,10 +31,10 @@ import mwparserfromhell
class TestDocs(unittest.TestCase):
"""Integration test cases for mwparserfromhell's documentation."""

def assertPrint(self, input, output):
"""Assertion check that *input*, when printed, produces *output*."""
def assertPrint(self, value, output):
"""Assertion check that *value*, when printed, produces *output*."""
buff = StringIO()
print(input, end="", file=buff)
print(value, end="", file=buff)
buff.seek(0)
self.assertEqual(output, buff.read())



+ 3
- 3
tests/test_external_link.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -28,8 +28,8 @@ from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext
class TestExternalLink(TreeEqualityTestCase):
"""Test cases for the ExternalLink node."""

def test_unicode(self):
"""test ExternalLink.__unicode__()"""
def test_str(self):
"""test ExternalLink.__str__()"""
node = ExternalLink(wraptext("http://example.com/"), brackets=False)
self.assertEqual("http://example.com/", str(node))
node2 = ExternalLink(wraptext("http://example.com/"))


+ 3
- 3
tests/test_heading.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -28,8 +28,8 @@ from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext
class TestHeading(TreeEqualityTestCase):
"""Test cases for the Heading node."""

def test_unicode(self):
"""test Heading.__unicode__()"""
def test_str(self):
"""test Heading.__str__()"""
node = Heading(wraptext("foobar"), 2)
self.assertEqual("==foobar==", str(node))
node2 = Heading(wraptext(" zzz "), 5)


+ 4
- 4
tests/test_html_entity.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -23,13 +23,13 @@ import unittest

from mwparserfromhell.nodes import HTMLEntity

from ._test_tree_equality import TreeEqualityTestCase, wrap
from ._test_tree_equality import TreeEqualityTestCase

class TestHTMLEntity(TreeEqualityTestCase):
"""Test cases for the HTMLEntity node."""

def test_unicode(self):
"""test HTMLEntity.__unicode__()"""
def test_str(self):
"""test HTMLEntity.__str__()"""
node1 = HTMLEntity("nbsp", named=True, hexadecimal=False)
node2 = HTMLEntity("107", named=False, hexadecimal=False)
node3 = HTMLEntity("6b", named=False, hexadecimal=True)


+ 4
- 5
tests/test_parameter.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -21,16 +21,15 @@

import unittest

from mwparserfromhell.nodes import Text
from mwparserfromhell.nodes.extras import Parameter

from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext
from ._test_tree_equality import TreeEqualityTestCase, wraptext

class TestParameter(TreeEqualityTestCase):
"""Test cases for the Parameter node extra."""

def test_unicode(self):
"""test Parameter.__unicode__()"""
def test_str(self):
"""test Parameter.__str__()"""
node = Parameter(wraptext("1"), wraptext("foo"), showkey=False)
self.assertEqual("foo", str(node))
node2 = Parameter(wraptext("foo"), wraptext("bar"))


+ 12
- 11
tests/test_smart_list.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -22,11 +22,11 @@
import unittest

from mwparserfromhell.smart_list import SmartList
from mwparserfromhell.smart_list.ListProxy import _ListProxy
from mwparserfromhell.smart_list.list_proxy import ListProxy


class TestSmartList(unittest.TestCase):
"""Test cases for the SmartList class and its child, _ListProxy."""
"""Test cases for the SmartList class and its child, ListProxy."""

def _test_get_set_del_item(self, builder):
"""Run tests on __get/set/delitem__ of a list built with *builder*."""
@@ -178,7 +178,7 @@ class TestSmartList(unittest.TestCase):

gen1 = iter(list1)
out = []
for i in range(len(list1)):
for _ in range(len(list1)):
out.append(next(gen1))
self.assertRaises(StopIteration, next, gen1)
self.assertEqual([0, 1, 2, 3, "one", "two"], out)
@@ -260,7 +260,8 @@ class TestSmartList(unittest.TestCase):
list3.sort(key=lambda i: i[1], reverse=True)
self.assertEqual([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3)

def _dispatch_test_for_children(self, meth):
@staticmethod
def _dispatch_test_for_children(meth):
"""Run a test method on various different types of children."""
meth(lambda L: SmartList(list(L))[:])
meth(lambda L: SmartList([999] + list(L))[1:])
@@ -268,13 +269,13 @@ class TestSmartList(unittest.TestCase):
meth(lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2])

def test_docs(self):
"""make sure the methods of SmartList/_ListProxy have docstrings"""
"""make sure the methods of SmartList/ListProxy have docstrings"""
methods = ["append", "count", "extend", "index", "insert", "pop",
"remove", "reverse", "sort"]
for meth in methods:
expected = getattr(list, meth).__doc__
smartlist_doc = getattr(SmartList, meth).__doc__
listproxy_doc = getattr(_ListProxy, meth).__doc__
listproxy_doc = getattr(ListProxy, meth).__doc__
self.assertEqual(expected, smartlist_doc)
self.assertEqual(expected, listproxy_doc)

@@ -305,19 +306,19 @@ class TestSmartList(unittest.TestCase):
self._test_list_methods(SmartList)

def test_child_get_set_del(self):
"""make sure _ListProxy's getitem/setitem/delitem work"""
"""make sure ListProxy's getitem/setitem/delitem work"""
self._dispatch_test_for_children(self._test_get_set_del_item)

def test_child_add(self):
"""make sure _ListProxy's add/radd/iadd work"""
"""make sure ListProxy's add/radd/iadd work"""
self._dispatch_test_for_children(self._test_add_radd_iadd)

def test_child_other_magics(self):
"""make sure _ListProxy's other magically implemented features work"""
"""make sure ListProxy's other magically implemented features work"""
self._dispatch_test_for_children(self._test_other_magic_methods)

def test_child_methods(self):
"""make sure _ListProxy's non-magic methods work, like append()"""
"""make sure ListProxy's non-magic methods work, like append()"""
self._dispatch_test_for_children(self._test_list_methods)

def test_influence(self):


+ 3
- 3
tests/test_string_mixin.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -29,7 +29,7 @@ class _FakeString(StringMixIn):
def __init__(self, data):
self._data = data

def __unicode__(self):
def __str__(self):
return self._data


@@ -128,7 +128,7 @@ class TestStringMixIn(unittest.TestCase):
self.assertIsInstance(gen2, GeneratorType)

out = []
for i in range(len(str1)):
for _ in range(len(str1)):
out.append(next(gen1))
self.assertRaises(StopIteration, next, gen1)
self.assertEqual(expected, out)


+ 4
- 4
tests/test_tag.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -34,8 +34,8 @@ agenpnv = lambda name, a, b, c: Attribute(wraptext(name), None, '"', a, b, c)
class TestTag(TreeEqualityTestCase):
"""Test cases for the Tag node."""

def test_unicode(self):
"""test Tag.__unicode__()"""
def test_str(self):
"""test Tag.__str__()"""
node1 = Tag(wraptext("ref"))
node2 = Tag(wraptext("span"), wraptext("foo"),
[agen("style", "color: red;")])
@@ -227,7 +227,7 @@ class TestTag(TreeEqualityTestCase):
node.wiki_markup = "{"
self.assertEqual("{|\n{", node)
node2 = Tag(wraptext("table"), wraptext("\n"), wiki_style_separator="|")
self.assertEqual("|", node.wiki_style_separator)
self.assertEqual("|", node2.wiki_style_separator)

def test_closing_wiki_markup(self):
"""test getter/setter for closing_wiki_markup attribute"""


+ 3
- 3
tests/test_template.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -34,8 +34,8 @@ pgenh = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=False)
class TestTemplate(TreeEqualityTestCase):
"""Test cases for the Template node."""

def test_unicode(self):
"""test Template.__unicode__()"""
def test_str(self):
"""test Template.__str__()"""
node = Template(wraptext("foobar"))
self.assertEqual("{{foobar}}", str(node))
node2 = Template(wraptext("foo"),


+ 3
- 3
tests/test_text.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -26,8 +26,8 @@ from mwparserfromhell.nodes import Text
class TestText(unittest.TestCase):
"""Test cases for the Text node."""

def test_unicode(self):
"""test Text.__unicode__()"""
def test_str(self):
"""test Text.__str__()"""
node = Text("foobar")
self.assertEqual("foobar", str(node))
node2 = Text("fóóbar")


+ 4
- 7
tests/test_tokens.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -61,12 +61,9 @@ class TestTokens(unittest.TestCase):
hundredchars = ("earwig" * 100)[:97] + "..."

self.assertEqual("Token()", repr(token1))
token2repr1 = "Token(foo='bar', baz=123)"
token2repr2 = "Token(baz=123, foo='bar')"
token3repr = "Text(text='" + hundredchars + "')"
token2repr = repr(token2)
self.assertTrue(token2repr == token2repr1 or token2repr == token2repr2)
self.assertEqual(token3repr, repr(token3))
self.assertTrue(repr(token2) in (
"Token(foo='bar', baz=123)", "Token(baz=123, foo='bar')"))
self.assertEqual("Text(text='" + hundredchars + "')", repr(token3))

def test_equality(self):
"""check that equivalent tokens are considered equal"""


+ 4
- 5
tests/test_wikicode.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -24,8 +24,7 @@ import re
from types import GeneratorType
import unittest

from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity,
Node, Tag, Template, Text, Wikilink)
from mwparserfromhell.nodes import Argument, Heading, Template, Text
from mwparserfromhell.smart_list import SmartList
from mwparserfromhell.wikicode import Wikicode
from mwparserfromhell import parse
@@ -35,8 +34,8 @@ from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext
class TestWikicode(TreeEqualityTestCase):
"""Tests for the Wikicode class, which manages a list of nodes."""

def test_unicode(self):
"""test Wikicode.__unicode__()"""
def test_str(self):
"""test Wikicode.__str__()"""
code1 = parse("foobar")
code2 = parse("Have a {{template}} and a [[page|link]]")
self.assertEqual("foobar", str(code1))


+ 3
- 3
tests/test_wikilink.py View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -28,8 +28,8 @@ from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext
class TestWikilink(TreeEqualityTestCase):
"""Test cases for the Wikilink node."""

def test_unicode(self):
"""test Wikilink.__unicode__()"""
def test_str(self):
"""test Wikilink.__str__()"""
node = Wikilink(wraptext("foobar"))
self.assertEqual("[[foobar]]", str(node))
node2 = Wikilink(wraptext("foo"), wraptext("bar"))


Loading…
Cancel
Save