@@ -1,3 +1,10 @@ | |||
v0.5.3 (released March 30, 2019): | |||
- Fixed manual construction of Node objects, previously unsupported. (#214) | |||
- Fixed Wikicode transformation methods (replace(), remove(), etc.) when passed | |||
an empty section as an argument. (#212) | |||
- Fixed the parser getting stuck inside malformed tables. (#206) | |||
v0.5.2 (released November 1, 2018): | |||
- Dropped support for end-of-life Python versions 2.6, 3.2, 3.3. (#199, #204) | |||
@@ -189,11 +189,13 @@ Python 3 code (via the API_): | |||
API_URL = "https://en.wikipedia.org/w/api.php" | |||
def parse(title): | |||
data = {"action": "query", "prop": "revisions", "rvlimit": 1, | |||
"rvprop": "content", "format": "json", "titles": title} | |||
data = {"action": "query", "prop": "revisions", "rvprop": "content", | |||
"rvslots": "main", "rvlimit": 1, "titles": title, | |||
"format": "json", "formatversion": "2"} | |||
raw = urlopen(API_URL, urlencode(data).encode()).read() | |||
res = json.loads(raw) | |||
text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"] | |||
revision = res["query"]["pages"][0]["revisions"][0] | |||
text = revision["slots"]["main"]["content"] | |||
return mwparserfromhell.parse(text) | |||
.. _MediaWiki: http://mediawiki.org | |||
@@ -1,6 +1,6 @@ | |||
# This config file is used by appveyor.com to build Windows release binaries | |||
version: 0.5.2-b{build} | |||
version: 0.5.3-b{build} | |||
branches: | |||
only: | |||
@@ -1,6 +1,20 @@ | |||
Changelog | |||
========= | |||
v0.5.3 | |||
------ | |||
`Released March 30, 2019 <https://github.com/earwig/mwparserfromhell/tree/v0.5.3>`_ | |||
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.5.2...v0.5.3>`__): | |||
- Fixed manual construction of Node objects, previously unsupported. | |||
(`#214 <https://github.com/earwig/mwparserfromhell/issues/214>`_) | |||
- Fixed :class:`.Wikicode` transformation methods (:meth:`.Wikicode.replace`, | |||
:meth:`.Wikicode.remove`, etc.) when passed an empty section as an argument. | |||
(`#212 <https://github.com/earwig/mwparserfromhell/issues/212>`_) | |||
- Fixed the parser getting stuck inside malformed tables. | |||
(`#206 <https://github.com/earwig/mwparserfromhell/issues/206>`_) | |||
v0.5.2 | |||
------ | |||
@@ -8,17 +22,17 @@ v0.5.2 | |||
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.5.1...v0.5.2>`__): | |||
- Dropped support for end-of-life Python versions 2.6, 3.2, 3.3. | |||
(`#199 <https://github.com/earwig/mwparserfromhell/issues/199>`, | |||
`#204 <https://github.com/earwig/mwparserfromhell/pull/204>`) | |||
(`#199 <https://github.com/earwig/mwparserfromhell/issues/199>`_, | |||
`#204 <https://github.com/earwig/mwparserfromhell/pull/204>`_) | |||
- Fixed signals getting stuck inside the C tokenizer until parsing finishes, | |||
in pathological cases. | |||
(`#206 <https://github.com/earwig/mwparserfromhell/issues/206>`) | |||
(`#206 <https://github.com/earwig/mwparserfromhell/issues/206>`_) | |||
- Fixed `<wbr>` not being considered a single-only tag. | |||
(`#200 <https://github.com/earwig/mwparserfromhell/pull/200>`) | |||
(`#200 <https://github.com/earwig/mwparserfromhell/pull/200>`_) | |||
- Fixed a C tokenizer crash on Python 3.7 when compiled with assertions. | |||
(`#208 <https://github.com/earwig/mwparserfromhell/issues/208>`) | |||
(`#208 <https://github.com/earwig/mwparserfromhell/issues/208>`_) | |||
- Cleaned up some minor documentation issues. | |||
(`#207 <https://github.com/earwig/mwparserfromhell/pull/207>`) | |||
(`#207 <https://github.com/earwig/mwparserfromhell/pull/207>`_) | |||
v0.5.1 | |||
------ | |||
@@ -29,7 +29,7 @@ outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode. | |||
__author__ = "Ben Kurtovic" | |||
__copyright__ = "Copyright (C) 2012-2018 Ben Kurtovic" | |||
__license__ = "MIT License" | |||
__version__ = "0.5.2" | |||
__version__ = "0.5.3" | |||
__email__ = "ben.kurtovic@gmail.com" | |||
from . import (compat, definitions, nodes, parser, smart_list, string_mixin, | |||
@@ -56,6 +56,7 @@ class Node(StringMixIn): | |||
def __children__(self): | |||
return | |||
# pylint: disable=unreachable | |||
yield # pragma: no cover (this is a generator that yields nothing) | |||
def __strip__(self, **kwargs): | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -33,8 +33,8 @@ class Argument(Node): | |||
def __init__(self, name, default=None): | |||
super(Argument, self).__init__() | |||
self._name = name | |||
self._default = default | |||
self.name = name | |||
self.default = default | |||
def __unicode__(self): | |||
start = "{{{" + str(self.name) | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -32,10 +32,10 @@ class Comment(Node): | |||
def __init__(self, contents): | |||
super(Comment, self).__init__() | |||
self._contents = contents | |||
self.contents = contents | |||
def __unicode__(self): | |||
return "<!--" + str(self.contents) + "-->" | |||
return "<!--" + self.contents + "-->" | |||
@property | |||
def contents(self): | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -33,9 +33,9 @@ class ExternalLink(Node): | |||
def __init__(self, url, title=None, brackets=True): | |||
super(ExternalLink, self).__init__() | |||
self._url = url | |||
self._title = title | |||
self._brackets = brackets | |||
self.url = url | |||
self.title = title | |||
self.brackets = brackets | |||
def __unicode__(self): | |||
if self.brackets: | |||
@@ -21,8 +21,8 @@ | |||
# SOFTWARE. | |||
""" | |||
This package contains objects used by :class:`.Node`\ s, but that are not nodes | |||
themselves. This includes template parameters and HTML tag attributes. | |||
This package contains objects used by :class:`.Node`\\ s, but that are not | |||
nodes themselves. This includes template parameters and HTML tag attributes. | |||
""" | |||
from .attribute import Attribute | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -37,16 +37,15 @@ class Attribute(StringMixIn): | |||
""" | |||
def __init__(self, name, value=None, quotes='"', pad_first=" ", | |||
pad_before_eq="", pad_after_eq="", check_quotes=True): | |||
pad_before_eq="", pad_after_eq=""): | |||
super(Attribute, self).__init__() | |||
if check_quotes and not quotes and self._value_needs_quotes(value): | |||
raise ValueError("given value {!r} requires quotes".format(value)) | |||
self._name = name | |||
self._value = value | |||
self._quotes = quotes | |||
self._pad_first = pad_first | |||
self._pad_before_eq = pad_before_eq | |||
self._pad_after_eq = pad_after_eq | |||
self.name = name | |||
self._quotes = None | |||
self.value = value | |||
self.quotes = quotes | |||
self.pad_first = pad_first | |||
self.pad_before_eq = pad_before_eq | |||
self.pad_after_eq = pad_after_eq | |||
def __unicode__(self): | |||
result = self.pad_first + str(self.name) + self.pad_before_eq | |||
@@ -59,10 +58,17 @@ class Attribute(StringMixIn): | |||
@staticmethod | |||
def _value_needs_quotes(val): | |||
"""Return the preferred quotes for the given value, or None.""" | |||
if val and any(char.isspace() for char in val): | |||
return ('"' in val and "'" in val) or ("'" if '"' in val else '"') | |||
return None | |||
"""Return valid quotes for the given value, or None if unneeded.""" | |||
if not val: | |||
return None | |||
val = "".join(str(node) for node in val.filter_text(recursive=False)) | |||
if not any(char.isspace() for char in val): | |||
return None | |||
if "'" in val and '"' not in val: | |||
return '"' | |||
if '"' in val and "'" not in val: | |||
return "'" | |||
return "\"'" # Either acceptable, " preferred over ' | |||
def _set_padding(self, attr, value): | |||
"""Setter for the value of a padding attribute.""" | |||
@@ -123,8 +129,8 @@ class Attribute(StringMixIn): | |||
else: | |||
code = parse_anything(newval) | |||
quotes = self._value_needs_quotes(code) | |||
if quotes in ['"', "'"] or (quotes is True and not self.quotes): | |||
self._quotes = quotes | |||
if quotes and (not self.quotes or self.quotes not in quotes): | |||
self._quotes = quotes[0] | |||
self._value = code | |||
@quotes.setter | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -40,11 +40,9 @@ class Parameter(StringMixIn): | |||
def __init__(self, name, value, showkey=True): | |||
super(Parameter, self).__init__() | |||
if not showkey and not self.can_hide_key(name): | |||
raise ValueError("key {!r} cannot be hidden".format(name)) | |||
self._name = name | |||
self._value = value | |||
self._showkey = showkey | |||
self.name = name | |||
self.value = value | |||
self.showkey = showkey | |||
def __unicode__(self): | |||
if self.showkey: | |||
@@ -83,5 +81,6 @@ class Parameter(StringMixIn): | |||
def showkey(self, newval): | |||
newval = bool(newval) | |||
if not newval and not self.can_hide_key(self.name): | |||
raise ValueError("parameter key cannot be hidden") | |||
raise ValueError("parameter key {!r} cannot be hidden".format( | |||
self.name)) | |||
self._showkey = newval |
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -33,8 +33,8 @@ class Heading(Node): | |||
def __init__(self, title, level): | |||
super(Heading, self).__init__() | |||
self._title = title | |||
self._level = level | |||
self.title = title | |||
self.level = level | |||
def __unicode__(self): | |||
return ("=" * self.level) + str(self.title) + ("=" * self.level) | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -38,28 +38,20 @@ class Tag(Node): | |||
closing_tag=None, wiki_style_separator=None, | |||
closing_wiki_markup=None): | |||
super(Tag, self).__init__() | |||
self._tag = tag | |||
if contents is None and not self_closing: | |||
self._contents = parse_anything("") | |||
else: | |||
self._contents = contents | |||
self.tag = tag | |||
self.contents = contents | |||
self._attrs = attrs if attrs else [] | |||
self._wiki_markup = wiki_markup | |||
self._self_closing = self_closing | |||
self._invalid = invalid | |||
self._implicit = implicit | |||
self._padding = padding | |||
if closing_tag: | |||
self._closing_tag = closing_tag | |||
else: | |||
self._closing_tag = tag | |||
self._wiki_style_separator = wiki_style_separator | |||
self._closing_wiki_markup = None | |||
self.wiki_markup = wiki_markup | |||
self.self_closing = self_closing | |||
self.invalid = invalid | |||
self.implicit = implicit | |||
self.padding = padding | |||
if closing_tag is not None: | |||
self.closing_tag = closing_tag | |||
self.wiki_style_separator = wiki_style_separator | |||
if closing_wiki_markup is not None: | |||
self._closing_wiki_markup = closing_wiki_markup | |||
elif wiki_markup and not self_closing: | |||
self._closing_wiki_markup = wiki_markup | |||
else: | |||
self._closing_wiki_markup = None | |||
self.closing_wiki_markup = closing_wiki_markup | |||
def __unicode__(self): | |||
if self.wiki_markup: | |||
@@ -69,10 +61,10 @@ class Tag(Node): | |||
attrs = "" | |||
padding = self.padding or "" | |||
separator = self.wiki_style_separator or "" | |||
close = self.closing_wiki_markup or "" | |||
if self.self_closing: | |||
return self.wiki_markup + attrs + padding + separator | |||
else: | |||
close = self.closing_wiki_markup or "" | |||
return self.wiki_markup + attrs + padding + separator + \ | |||
str(self.contents) + close | |||
@@ -93,10 +85,10 @@ class Tag(Node): | |||
yield attr.name | |||
if attr.value is not None: | |||
yield attr.value | |||
if self.contents: | |||
if not self.self_closing: | |||
yield self.contents | |||
if not self.self_closing and not self.wiki_markup and self.closing_tag: | |||
yield self.closing_tag | |||
if not self.wiki_markup and self.closing_tag: | |||
yield self.closing_tag | |||
def __strip__(self, **kwargs): | |||
if self.contents and is_visible(self.tag): | |||
@@ -308,7 +300,10 @@ class Tag(Node): | |||
return attr | |||
def remove(self, name): | |||
"""Remove all attributes with the given *name*.""" | |||
"""Remove all attributes with the given *name*. | |||
Raises :exc:`ValueError` if none were found. | |||
""" | |||
attrs = [attr for attr in self.attributes if attr.name == name.strip()] | |||
if not attrs: | |||
raise ValueError(name) | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -38,7 +38,7 @@ class Template(Node): | |||
def __init__(self, name, params=None): | |||
super(Template, self).__init__() | |||
self._name = name | |||
self.name = name | |||
if params: | |||
self._params = params | |||
else: | |||
@@ -108,7 +108,7 @@ class Template(Node): | |||
def _blank_param_value(value): | |||
"""Remove the content from *value* while keeping its whitespace. | |||
Replace *value*\ 's nodes with two text nodes, the first containing | |||
Replace *value*\\ 's nodes with two text nodes, the first containing | |||
whitespace from before its content and the second containing whitespace | |||
from after its content. | |||
""" | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -32,7 +32,7 @@ class Text(Node): | |||
def __init__(self, value): | |||
super(Text, self).__init__() | |||
self._value = value | |||
self.value = value | |||
def __unicode__(self): | |||
return self.value | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -33,8 +33,8 @@ class Wikilink(Node): | |||
def __init__(self, title, text=None): | |||
super(Wikilink, self).__init__() | |||
self._title = title | |||
self._text = text | |||
self.title = title | |||
self.text = text | |||
def __unicode__(self): | |||
if self.text is not None: | |||
@@ -55,8 +55,8 @@ class Parser(object): | |||
Actual parsing is a two-step process: first, the text is split up into a | |||
series of tokens by the :class:`.Tokenizer`, and then the tokens are | |||
converted into trees of :class:`.Wikicode` objects and :class:`.Node`\ s by | |||
the :class:`.Builder`. | |||
converted into trees of :class:`.Wikicode` objects and :class:`.Node`\\ s | |||
by the :class:`.Builder`. | |||
Instances of this class or its dependents (:class:`.Tokenizer` and | |||
:class:`.Builder`) should not be shared between threads. :meth:`parse` can | |||
@@ -79,7 +79,7 @@ class Parser(object): | |||
If given, *context* will be passed as a starting context to the parser. | |||
This is helpful when this function is used inside node attribute | |||
setters. For example, :class:`.ExternalLink`\ 's | |||
setters. For example, :class:`.ExternalLink`\\ 's | |||
:attr:`~.ExternalLink.url` setter sets *context* to | |||
:mod:`contexts.EXT_LINK_URI <.contexts>` to prevent the URL itself | |||
from becoming an :class:`.ExternalLink`. | |||
@@ -48,7 +48,7 @@ def _add_handler(token_type): | |||
class Builder(object): | |||
"""Builds a tree of nodes out of a sequence of tokens. | |||
To use, pass a list of :class:`.Token`\ s to the :meth:`build` method. The | |||
To use, pass a list of :class:`.Token`\\ s to the :meth:`build` method. The | |||
list will be exhausted as it is parsed and a :class:`.Wikicode` object | |||
containing the node tree will be returned. | |||
""" | |||
@@ -237,8 +237,7 @@ class Builder(object): | |||
else: | |||
name, value = self._pop(), None | |||
return Attribute(name, value, quotes, start.pad_first, | |||
start.pad_before_eq, start.pad_after_eq, | |||
check_quotes=False) | |||
start.pad_before_eq, start.pad_after_eq) | |||
else: | |||
self._write(self._handle_token(token)) | |||
raise ParserError("_handle_attribute() missed a close token") | |||
@@ -1,5 +1,5 @@ | |||
/* | |||
Copyright (C) 2012-2018 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||
this software and associated documentation files (the "Software"), to deal in | |||
@@ -2254,6 +2254,7 @@ static int Tokenizer_parse_table(Tokenizer* self) | |||
Py_DECREF(padding); | |||
Py_DECREF(style); | |||
while (!Tokenizer_IS_CURRENT_STACK(self, restore_point)) { | |||
Tokenizer_memoize_bad_route(self); | |||
trash = Tokenizer_pop(self); | |||
Py_XDECREF(trash); | |||
} | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2018 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -455,7 +455,7 @@ class Tokenizer(object): | |||
else: | |||
self._parse_free_uri_scheme() | |||
invalid = ("\n", " ", "[", "]") | |||
punct = tuple(",;\.:!?)") | |||
punct = tuple(",;\\.:!?)") | |||
if self._read() is self.END or self._read()[0] in invalid: | |||
self._fail_route() | |||
tail = "" | |||
@@ -1133,6 +1133,7 @@ class Tokenizer(object): | |||
table = self._parse(contexts.TABLE_OPEN) | |||
except BadRoute: | |||
while self._stack_ident != restore_point: | |||
self._memoize_bad_route() | |||
self._pop() | |||
self._head = reset | |||
self._emit_text("{") | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -70,5 +70,5 @@ def parse_anything(value, context=0, skip_style_tags=False): | |||
nodelist += parse_anything(item, context, skip_style_tags).nodes | |||
return Wikicode(nodelist) | |||
except TypeError: | |||
error = "Needs string, Node, Wikicode, int, None, or iterable of these, but got {0}: {1}" | |||
error = "Needs string, Node, Wikicode, file, int, None, or iterable of these, but got {0}: {1}" | |||
raise ValueError(error.format(type(value).__name__, value)) |
@@ -27,6 +27,7 @@ import re | |||
from .compat import bytes, py3k, range, str | |||
from .nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, | |||
Node, Tag, Template, Text, Wikilink) | |||
from .smart_list import _ListProxy | |||
from .string_mixin import StringMixIn | |||
from .utils import parse_anything | |||
@@ -55,7 +56,7 @@ class Wikicode(StringMixIn): | |||
@staticmethod | |||
def _get_children(node, contexts=False, restrict=None, parent=None): | |||
"""Iterate over all child :class:`.Node`\ s of a given *node*.""" | |||
"""Iterate over all child :class:`.Node`\\ s of a given *node*.""" | |||
yield (parent, node) if contexts else node | |||
if restrict and isinstance(node, restrict): | |||
return | |||
@@ -108,6 +109,26 @@ class Wikicode(StringMixIn): | |||
if (not forcetype or isinstance(node, forcetype)) and match(node): | |||
yield (i, node) | |||
def _is_child_wikicode(self, obj, recursive=True): | |||
"""Return whether the given :class:`.Wikicode` is a descendant.""" | |||
def deref(nodes): | |||
if isinstance(nodes, _ListProxy): | |||
return nodes._parent # pylint: disable=protected-access | |||
return nodes | |||
target = deref(obj.nodes) | |||
if target is deref(self.nodes): | |||
return True | |||
if recursive: | |||
todo = [self] | |||
while todo: | |||
code = todo.pop() | |||
if target is deref(code.nodes): | |||
return True | |||
for node in code.nodes: | |||
todo += list(node.__children__()) | |||
return False | |||
def _do_strong_search(self, obj, recursive=True): | |||
"""Search for the specific element *obj* within the node list. | |||
@@ -120,11 +141,16 @@ class Wikicode(StringMixIn): | |||
:class:`.Wikicode` contained by a node within ``self``. If *obj* is not | |||
found, :exc:`ValueError` is raised. | |||
""" | |||
if isinstance(obj, Wikicode): | |||
if not self._is_child_wikicode(obj, recursive): | |||
raise ValueError(obj) | |||
return obj, slice(0, len(obj.nodes)) | |||
if isinstance(obj, Node): | |||
mkslice = lambda i: slice(i, i + 1) | |||
if not recursive: | |||
return self, mkslice(self.index(obj)) | |||
for i, node in enumerate(self.nodes): | |||
for node in self.nodes: | |||
for context, child in self._get_children(node, contexts=True): | |||
if obj is child: | |||
if not context: | |||
@@ -132,11 +158,7 @@ class Wikicode(StringMixIn): | |||
return context, mkslice(context.index(child)) | |||
raise ValueError(obj) | |||
context, ind = self._do_strong_search(obj.get(0), recursive) | |||
for i in range(1, len(obj.nodes)): | |||
if obj.get(i) is not context.get(ind.start + i): | |||
raise ValueError(obj) | |||
return context, slice(ind.start, ind.start + len(obj.nodes)) | |||
raise TypeError(obj) | |||
def _do_weak_search(self, obj, recursive): | |||
"""Search for an element that looks like *obj* within the node list. | |||
@@ -254,7 +276,7 @@ class Wikicode(StringMixIn): | |||
self._nodes = value | |||
def get(self, index): | |||
"""Return the *index*\ th node within the list of nodes.""" | |||
"""Return the *index*\\ th node within the list of nodes.""" | |||
return self.nodes[index] | |||
def set(self, index, value): | |||
@@ -1,7 +1,5 @@ | |||
#! /usr/bin/env bash | |||
set -euo pipefail | |||
if [[ -z "$1" ]]; then | |||
echo "usage: $0 1.2.3" | |||
exit 1 | |||
@@ -77,7 +75,8 @@ do_git_stuff() { | |||
upload_to_pypi() { | |||
echo -n "PyPI: uploading source tarball..." | |||
python setup.py -q register sdist upload -s | |||
python setup.py -q sdist | |||
twine upload -s dist/mwparserfromhell-$VERSION* | |||
echo " done." | |||
} | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -67,7 +67,7 @@ class TreeEqualityTestCase(TestCase): | |||
def assertCommentNodeEqual(self, expected, actual): | |||
"""Assert that two Comment nodes have the same data.""" | |||
self.assertWikicodeEqual(expected.contents, actual.contents) | |||
self.assertEqual(expected.contents, actual.contents) | |||
def assertHeadingNodeEqual(self, expected, actual): | |||
"""Assert that two Heading nodes have the same data.""" | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -232,11 +232,11 @@ class TestBuilder(TreeEqualityTestCase): | |||
tests = [ | |||
([tokens.CommentStart(), tokens.Text(text="foobar"), | |||
tokens.CommentEnd()], | |||
wrap([Comment(wraptext("foobar"))])), | |||
wrap([Comment("foobar")])), | |||
([tokens.CommentStart(), tokens.Text(text="spam"), | |||
tokens.Text(text="eggs"), tokens.CommentEnd()], | |||
wrap([Comment(wraptext("spam", "eggs"))])), | |||
wrap([Comment("spameggs")])), | |||
] | |||
for test, valid in tests: | |||
self.assertWikicodeEqual(valid, self.builder.build(test)) | |||
@@ -412,7 +412,7 @@ class TestBuilder(TreeEqualityTestCase): | |||
wraptext("c"), params=[Parameter(wraptext("1"), wrap([Wikilink( | |||
wraptext("d")), Argument(wraptext("e"))]), showkey=False)])]), | |||
showkey=False)]), Wikilink(wraptext("f"), wrap([Argument(wraptext( | |||
"g")), Comment(wraptext("h"))])), Template(wraptext("i"), params=[ | |||
"g")), Comment("h")])), Template(wraptext("i"), params=[ | |||
Parameter(wraptext("j"), wrap([HTMLEntity("nbsp", | |||
named=True)]))])]) | |||
self.assertWikicodeEqual(valid, self.builder.build(test)) | |||
@@ -114,14 +114,16 @@ class TestDocs(unittest.TestCase): | |||
url1 = "https://en.wikipedia.org/w/api.php" | |||
url2 = "https://en.wikipedia.org/w/index.php?title={0}&action=raw" | |||
title = "Test" | |||
data = {"action": "query", "prop": "revisions", "rvlimit": 1, | |||
"rvprop": "content", "format": "json", "titles": title} | |||
data = {"action": "query", "prop": "revisions", "rvprop": "content", | |||
"rvslots": "main", "rvlimit": 1, "titles": title, | |||
"format": "json", "formatversion": "2"} | |||
try: | |||
raw = urlopen(url1, urlencode(data).encode("utf8")).read() | |||
except IOError: | |||
self.skipTest("cannot continue because of unsuccessful web call") | |||
res = json.loads(raw.decode("utf8")) | |||
text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"] | |||
revision = res["query"]["pages"][0]["revisions"][0] | |||
text = revision["slots"]["main"]["content"] | |||
try: | |||
expected = urlopen(url2.format(title)).read().decode("utf8") | |||
except IOError: | |||
@@ -188,8 +188,8 @@ class TestWikicode(TreeEqualityTestCase): | |||
self.assertRaises(ValueError, func, fake, "q", recursive=True) | |||
func("{{b}}{{c}}", "w", recursive=False) | |||
func("{{d}}{{e}}", "x", recursive=True) | |||
func(wrap(code4.nodes[-2:]), "y", recursive=False) | |||
func(wrap(code4.nodes[-2:]), "z", recursive=True) | |||
func(Wikicode(code4.nodes[-2:]), "y", recursive=False) | |||
func(Wikicode(code4.nodes[-2:]), "z", recursive=True) | |||
self.assertEqual(expected[3], code4) | |||
self.assertRaises(ValueError, func, "{{c}}{{d}}", "q", recursive=False) | |||
self.assertRaises(ValueError, func, "{{c}}{{d}}", "q", recursive=True) | |||
@@ -218,6 +218,20 @@ class TestWikicode(TreeEqualityTestCase): | |||
func("{{foo}}{{baz}}", "{{lol}}") | |||
self.assertEqual(expected[6], code7) | |||
code8 = parse("== header ==") | |||
func = partial(meth, code8) | |||
sec1, sec2 = code8.get_sections(include_headings=False) | |||
func(sec1, "lead\n") | |||
func(sec2, "\nbody") | |||
self.assertEqual(expected[7], code8) | |||
code9 = parse("{{foo}}") | |||
meth(code9.get_sections()[0], code9.get_sections()[0], "{{bar}}") | |||
meth(code9.get_sections()[0], code9, "{{baz}}") | |||
meth(code9, code9, "{{qux}}") | |||
meth(code9, code9.get_sections()[0], "{{quz}}") | |||
self.assertEqual(expected[8], code9) | |||
def test_insert_before(self): | |||
"""test Wikicode.insert_before()""" | |||
meth = lambda code, *args, **kw: code.insert_before(*args, **kw) | |||
@@ -228,7 +242,10 @@ class TestWikicode(TreeEqualityTestCase): | |||
"{{a}}w{{b}}{{c}}x{{d}}{{e}}{{f}}{{g}}{{h}}yz{{i}}{{j}}", | |||
"{{a|x{{b}}{{c}}|{{f|{{g}}=y{{h}}{{i}}}}}}", | |||
"here cdis {{some abtext and a {{template}}}}", | |||
"{{foo}}{{bar}}{{baz}}{{lol}}{{foo}}{{baz}}"] | |||
"{{foo}}{{bar}}{{baz}}{{lol}}{{foo}}{{baz}}", | |||
"lead\n== header ==\nbody", | |||
"{{quz}}{{qux}}{{baz}}{{bar}}{{foo}}", | |||
] | |||
self._test_search(meth, expected) | |||
def test_insert_after(self): | |||
@@ -241,16 +258,26 @@ class TestWikicode(TreeEqualityTestCase): | |||
"{{a}}{{b}}{{c}}w{{d}}{{e}}x{{f}}{{g}}{{h}}{{i}}{{j}}yz", | |||
"{{a|{{b}}{{c}}x|{{f|{{g}}={{h}}{{i}}y}}}}", | |||
"here is {{somecd text andab a {{template}}}}", | |||
"{{foo}}{{bar}}{{baz}}{{foo}}{{baz}}{{lol}}"] | |||
"{{foo}}{{bar}}{{baz}}{{foo}}{{baz}}{{lol}}", | |||
"lead\n== header ==\nbody", | |||
"{{foo}}{{bar}}{{baz}}{{qux}}{{quz}}", | |||
] | |||
self._test_search(meth, expected) | |||
def test_replace(self): | |||
"""test Wikicode.replace()""" | |||
meth = lambda code, *args, **kw: code.replace(*args, **kw) | |||
expected = [ | |||
"{{a}}xz[[y]]{{e}}", "dcdffe", "{{a|x|{{c|d=y}}}}", | |||
"{{a}}wx{{f}}{{g}}z", "{{a|x|{{f|{{g}}=y}}}}", | |||
"here cd ab a {{template}}}}", "{{foo}}{{bar}}{{baz}}{{lol}}"] | |||
"{{a}}xz[[y]]{{e}}", | |||
"dcdffe", | |||
"{{a|x|{{c|d=y}}}}", | |||
"{{a}}wx{{f}}{{g}}z", | |||
"{{a|x|{{f|{{g}}=y}}}}", | |||
"here cd ab a {{template}}}}", | |||
"{{foo}}{{bar}}{{baz}}{{lol}}", | |||
"lead\n== header ==\nbody", | |||
"{{quz}}", | |||
] | |||
self._test_search(meth, expected) | |||
def test_append(self): | |||
@@ -269,9 +296,16 @@ class TestWikicode(TreeEqualityTestCase): | |||
"""test Wikicode.remove()""" | |||
meth = lambda code, obj, value, **kw: code.remove(obj, **kw) | |||
expected = [ | |||
"{{a}}{{c}}", "", "{{a||{{c|d=}}}}", "{{a}}{{f}}", | |||
"{{a||{{f|{{g}}=}}}}", "here a {{template}}}}", | |||
"{{foo}}{{bar}}{{baz}}"] | |||
"{{a}}{{c}}", | |||
"", | |||
"{{a||{{c|d=}}}}", | |||
"{{a}}{{f}}", | |||
"{{a||{{f|{{g}}=}}}}", | |||
"here a {{template}}}}", | |||
"{{foo}}{{bar}}{{baz}}", | |||
"== header ==", | |||
"", | |||
] | |||
self._test_search(meth, expected) | |||
def test_matches(self): | |||
@@ -408,3 +408,17 @@ name: junk_after_table_row | |||
label: ignore junk on the first line of a table row | |||
input: "{|\n|- foo="bar" | baz\n|blerp\n|}" | |||
output: [TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|-"), Text(text="tr"), TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), Text(text="foo"), TagAttrEquals(), TagAttrQuote(char='"'), Text(text="bar"), TagAttrStart(pad_first=" ", pad_before_eq=" ", pad_after_eq=""), Text(text="|"), TagAttrStart(pad_first="", pad_before_eq="", pad_after_eq=""), Text(text="baz"), TagCloseOpen(padding="\n"), TagOpenOpen(wiki_markup="|"), Text(text="td"), TagCloseOpen(padding=""), Text(text="blerp\n"), TagOpenClose(wiki_markup=""), Text(text="td"), TagCloseClose(), TagOpenClose(wiki_markup=""), Text(text="tr"), TagCloseClose(), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose()] | |||
--- | |||
name: incomplete_nested_open_only | |||
label: many nested incomplete tables: table open only | |||
input: "{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|" | |||
output: [Text(text="{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|\n{|")] | |||
--- | |||
name: incomplete_nested_open_and_row | |||
label: many nested incomplete tables: table open and row separator (see issue #206) | |||
input: "{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-" | |||
output: [Text(text="{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-\n{|\n|-")] |