@@ -0,0 +1,13 @@ | |||||
BasedOnStyle: LLVM | |||||
AlignConsecutiveMacros: AcrossEmptyLines | |||||
AllowShortFunctionsOnASingleLine: Inline | |||||
AlwaysBreakAfterReturnType: TopLevelDefinitions | |||||
BinPackArguments: false | |||||
BinPackParameters: false | |||||
BreakBeforeBraces: Linux | |||||
ColumnLimit: 88 | |||||
IndentPPDirectives: AfterHash | |||||
IndentWidth: 4 | |||||
SpaceAfterCStyleCast: true | |||||
StatementMacros: | |||||
- PyObject_HEAD |
@@ -13,5 +13,6 @@ dist | |||||
docs/_build | docs/_build | ||||
scripts/*.log | scripts/*.log | ||||
htmlcov/ | htmlcov/ | ||||
compile_commands.json | |||||
.idea/ | .idea/ | ||||
.pytest_cache/ | .pytest_cache/ |
@@ -41,6 +41,7 @@ from mwparserfromhell.parser._tokenizer import CTokenizer | |||||
LOOPS = 10000 | LOOPS = 10000 | ||||
class Color: | class Color: | ||||
GRAY = "\x1b[30;1m" | GRAY = "\x1b[30;1m" | ||||
GREEN = "\x1b[92m" | GREEN = "\x1b[92m" | ||||
@@ -63,11 +64,11 @@ class MemoryTest: | |||||
data = {"name": None, "label": None, "input": None, "output": None} | data = {"name": None, "label": None, "input": None, "output": None} | ||||
for line in test.strip().splitlines(): | for line in test.strip().splitlines(): | ||||
if line.startswith("name:"): | if line.startswith("name:"): | ||||
data["name"] = line[len("name:"):].strip() | |||||
data["name"] = line[len("name:") :].strip() | |||||
elif line.startswith("label:"): | elif line.startswith("label:"): | ||||
data["label"] = line[len("label:"):].strip() | |||||
data["label"] = line[len("label:") :].strip() | |||||
elif line.startswith("input:"): | elif line.startswith("input:"): | ||||
raw = line[len("input:"):].strip() | |||||
raw = line[len("input:") :].strip() | |||||
if raw[0] == '"' and raw[-1] == '"': | if raw[0] == '"' and raw[-1] == '"': | ||||
raw = raw[1:-1] | raw = raw[1:-1] | ||||
raw = raw.encode("raw_unicode_escape") | raw = raw.encode("raw_unicode_escape") | ||||
@@ -81,7 +82,7 @@ class MemoryTest: | |||||
def load_file(filename): | def load_file(filename): | ||||
with open(filename, "rU") as fp: | with open(filename, "rU") as fp: | ||||
text = fp.read() | text = fp.read() | ||||
name = path.split(filename)[1][:0-len(extension)] | |||||
name = path.split(filename)[1][: 0 - len(extension)] | |||||
self._parse_file(name, text) | self._parse_file(name, text) | ||||
root = path.split(path.dirname(path.abspath(__file__)))[0] | root = path.split(path.dirname(path.abspath(__file__)))[0] | ||||
@@ -119,8 +120,11 @@ class MemoryTest: | |||||
tmpl = "{0}[{1:03}/{2}]{3} {4}: " | tmpl = "{0}[{1:03}/{2}]{3} {4}: " | ||||
for i, (name, text) in enumerate(self._tests, 1): | for i, (name, text) in enumerate(self._tests, 1): | ||||
sys.stdout.write(tmpl.format(Color.GRAY, i, len(self._tests), | |||||
Color.RESET, name.ljust(width))) | |||||
sys.stdout.write( | |||||
tmpl.format( | |||||
Color.GRAY, i, len(self._tests), Color.RESET, name.ljust(width) | |||||
) | |||||
) | |||||
sys.stdout.flush() | sys.stdout.flush() | ||||
parent, child = Pipe() | parent, child = Pipe() | ||||
p = Process(target=_runner, args=(text, child)) | p = Process(target=_runner, args=(text, child)) | ||||
@@ -156,6 +160,7 @@ def _runner(text, child): | |||||
child.send("OK") | child.send("OK") | ||||
child.recv() | child.recv() | ||||
if __name__ == "__main__": | if __name__ == "__main__": | ||||
setlocale(LC_ALL, "") | setlocale(LC_ALL, "") | ||||
MemoryTest().run() | MemoryTest().run() |
@@ -52,8 +52,10 @@ elif env_var is not None: | |||||
# Remove the command line argument as it isn't understood by setuptools: | # Remove the command line argument as it isn't understood by setuptools: | ||||
sys.argv = [arg for arg in sys.argv | |||||
if arg not in ("--without-extension", "--with-extension")] | |||||
sys.argv = [ | |||||
arg for arg in sys.argv if arg not in ("--without-extension", "--with-extension") | |||||
] | |||||
def build_ext_patched(self): | def build_ext_patched(self): | ||||
try: | try: | ||||
@@ -63,33 +65,40 @@ def build_ext_patched(self): | |||||
print("Falling back to pure Python mode.") | print("Falling back to pure Python mode.") | ||||
del self.extensions[:] | del self.extensions[:] | ||||
if fallback: | if fallback: | ||||
build_ext.run, build_ext_original = build_ext_patched, build_ext.run | build_ext.run, build_ext_original = build_ext_patched, build_ext.run | ||||
# Project-specific part begins here: | # Project-specific part begins here: | ||||
tokenizer = Extension("mwparserfromhell.parser._tokenizer", | |||||
sources=sorted(glob("src/mwparserfromhell/parser/ctokenizer/*.c")), | |||||
depends=sorted(glob("src/mwparserfromhell/parser/ctokenizer/*.h"))) | |||||
tokenizer = Extension( | |||||
"mwparserfromhell.parser._tokenizer", | |||||
sources=sorted(glob("src/mwparserfromhell/parser/ctokenizer/*.c")), | |||||
depends=sorted(glob("src/mwparserfromhell/parser/ctokenizer/*.h")), | |||||
) | |||||
setup( | setup( | ||||
name = "mwparserfromhell", | |||||
packages = find_packages("src"), | |||||
package_dir = {"": "src"}, | |||||
ext_modules = [tokenizer] if use_extension else [], | |||||
setup_requires = ["pytest-runner"] if "test" in sys.argv or "pytest" in sys.argv else [], | |||||
tests_require = ["pytest"], | |||||
version = __version__, | |||||
python_requires = ">= 3.5", | |||||
author = "Ben Kurtovic", | |||||
author_email = "ben.kurtovic@gmail.com", | |||||
url = "https://github.com/earwig/mwparserfromhell", | |||||
description = "MWParserFromHell is a parser for MediaWiki wikicode.", | |||||
long_description = long_docs, | |||||
download_url = "https://github.com/earwig/mwparserfromhell/tarball/v{}".format(__version__), | |||||
keywords = "earwig mwparserfromhell wikipedia wiki mediawiki wikicode template parsing", | |||||
license = "MIT License", | |||||
classifiers = [ | |||||
name="mwparserfromhell", | |||||
packages=find_packages("src"), | |||||
package_dir={"": "src"}, | |||||
ext_modules=[tokenizer] if use_extension else [], | |||||
setup_requires=["pytest-runner"] | |||||
if "test" in sys.argv or "pytest" in sys.argv | |||||
else [], | |||||
tests_require=["pytest"], | |||||
version=__version__, | |||||
python_requires=">= 3.5", | |||||
author="Ben Kurtovic", | |||||
author_email="ben.kurtovic@gmail.com", | |||||
url="https://github.com/earwig/mwparserfromhell", | |||||
description="MWParserFromHell is a parser for MediaWiki wikicode.", | |||||
long_description=long_docs, | |||||
download_url="https://github.com/earwig/mwparserfromhell/tarball/v{}".format( | |||||
__version__ | |||||
), | |||||
keywords="earwig mwparserfromhell wikipedia wiki mediawiki wikicode template parsing", | |||||
license="MIT License", | |||||
classifiers=[ | |||||
"Development Status :: 4 - Beta", | "Development Status :: 4 - Beta", | ||||
"Environment :: Console", | "Environment :: Console", | ||||
"Intended Audience :: Developers", | "Intended Audience :: Developers", | ||||
@@ -101,6 +110,6 @@ setup( | |||||
"Programming Language :: Python :: 3.7", | "Programming Language :: Python :: 3.7", | ||||
"Programming Language :: Python :: 3.8", | "Programming Language :: Python :: 3.8", | ||||
"Programming Language :: Python :: 3.9", | "Programming Language :: Python :: 3.9", | ||||
"Topic :: Text Processing :: Markup" | |||||
"Topic :: Text Processing :: Markup", | |||||
], | ], | ||||
) | ) |
@@ -30,7 +30,6 @@ __license__ = "MIT License" | |||||
__version__ = "0.7.dev0" | __version__ = "0.7.dev0" | ||||
__email__ = "ben.kurtovic@gmail.com" | __email__ = "ben.kurtovic@gmail.com" | ||||
from . import (definitions, nodes, parser, smart_list, string_mixin, | |||||
utils, wikicode) | |||||
from . import definitions, nodes, parser, smart_list, string_mixin, utils, wikicode | |||||
parse = utils.parse_anything | parse = utils.parse_anything |
@@ -26,8 +26,14 @@ When updating this file, please also update the the C tokenizer version: | |||||
- mwparserfromhell/parser/ctokenizer/definitions.h | - mwparserfromhell/parser/ctokenizer/definitions.h | ||||
""" | """ | ||||
__all__ = ["get_html_tag", "is_parsable", "is_visible", "is_single", | |||||
"is_single_only", "is_scheme"] | |||||
__all__ = [ | |||||
"get_html_tag", | |||||
"is_parsable", | |||||
"is_visible", | |||||
"is_single", | |||||
"is_single_only", | |||||
"is_scheme", | |||||
] | |||||
URI_SCHEMES = { | URI_SCHEMES = { | ||||
# [wikimedia/mediawiki.git]/includes/DefaultSettings.php @ 5c660de5d0 | # [wikimedia/mediawiki.git]/includes/DefaultSettings.php @ 5c660de5d0 | ||||
@@ -92,7 +98,7 @@ INVISIBLE_TAGS = [ | |||||
"score", | "score", | ||||
"section", | "section", | ||||
"templatedata", | "templatedata", | ||||
"timeline" | |||||
"timeline", | |||||
] | ] | ||||
# [wikimedia/mediawiki.git]/includes/parser/Sanitizer.php @ 95e17ee645 | # [wikimedia/mediawiki.git]/includes/parser/Sanitizer.php @ 95e17ee645 | ||||
@@ -103,29 +109,35 @@ MARKUP_TO_HTML = { | |||||
"#": "li", | "#": "li", | ||||
"*": "li", | "*": "li", | ||||
";": "dt", | ";": "dt", | ||||
":": "dd" | |||||
":": "dd", | |||||
} | } | ||||
def get_html_tag(markup): | def get_html_tag(markup): | ||||
"""Return the HTML tag associated with the given wiki-markup.""" | """Return the HTML tag associated with the given wiki-markup.""" | ||||
return MARKUP_TO_HTML[markup] | return MARKUP_TO_HTML[markup] | ||||
def is_parsable(tag): | def is_parsable(tag): | ||||
"""Return if the given *tag*'s contents should be passed to the parser.""" | """Return if the given *tag*'s contents should be passed to the parser.""" | ||||
return tag.lower() not in PARSER_BLACKLIST | return tag.lower() not in PARSER_BLACKLIST | ||||
def is_visible(tag): | def is_visible(tag): | ||||
"""Return whether or not the given *tag* contains visible text.""" | """Return whether or not the given *tag* contains visible text.""" | ||||
return tag.lower() not in INVISIBLE_TAGS | return tag.lower() not in INVISIBLE_TAGS | ||||
def is_single(tag): | def is_single(tag): | ||||
"""Return whether or not the given *tag* can exist without a close tag.""" | """Return whether or not the given *tag* can exist without a close tag.""" | ||||
return tag.lower() in SINGLE | return tag.lower() in SINGLE | ||||
def is_single_only(tag): | def is_single_only(tag): | ||||
"""Return whether or not the given *tag* must exist without a close tag.""" | """Return whether or not the given *tag* must exist without a close tag.""" | ||||
return tag.lower() in SINGLE_ONLY | return tag.lower() in SINGLE_ONLY | ||||
def is_scheme(scheme, slashes=True): | def is_scheme(scheme, slashes=True): | ||||
"""Return whether *scheme* is valid for external links.""" | """Return whether *scheme* is valid for external links.""" | ||||
scheme = scheme.lower() | scheme = scheme.lower() | ||||
@@ -39,5 +39,15 @@ from .tag import Tag | |||||
from .template import Template | from .template import Template | ||||
from .wikilink import Wikilink | from .wikilink import Wikilink | ||||
__all__ = ["Argument", "Comment", "ExternalLink", "HTMLEntity", "Heading", | |||||
"Node", "Tag", "Template", "Text", "Wikilink"] | |||||
__all__ = [ | |||||
"Argument", | |||||
"Comment", | |||||
"ExternalLink", | |||||
"HTMLEntity", | |||||
"Heading", | |||||
"Node", | |||||
"Tag", | |||||
"Template", | |||||
"Text", | |||||
"Wikilink", | |||||
] |
@@ -22,6 +22,7 @@ from ..string_mixin import StringMixIn | |||||
__all__ = ["Node"] | __all__ = ["Node"] | ||||
class Node(StringMixIn): | class Node(StringMixIn): | ||||
"""Represents the base Node type, demonstrating the methods to override. | """Represents the base Node type, demonstrating the methods to override. | ||||
@@ -35,6 +36,7 @@ class Node(StringMixIn): | |||||
:meth:`__showtree__` can be overridden to build a nice tree representation | :meth:`__showtree__` can be overridden to build a nice tree representation | ||||
of the node, if desired, for :meth:`~.Wikicode.get_tree`. | of the node, if desired, for :meth:`~.Wikicode.get_tree`. | ||||
""" | """ | ||||
def __str__(self): | def __str__(self): | ||||
raise NotImplementedError() | raise NotImplementedError() | ||||
@@ -24,6 +24,7 @@ from ..utils import parse_anything | |||||
__all__ = ["Argument"] | __all__ = ["Argument"] | ||||
class Argument(Node): | class Argument(Node): | ||||
"""Represents a template argument substitution, like ``{{{foo}}}``.""" | """Represents a template argument substitution, like ``{{{foo}}}``.""" | ||||
@@ -23,6 +23,7 @@ from ._base import Node | |||||
__all__ = ["Comment"] | __all__ = ["Comment"] | ||||
class Comment(Node): | class Comment(Node): | ||||
"""Represents a hidden HTML comment, like ``<!-- foobar -->``.""" | """Represents a hidden HTML comment, like ``<!-- foobar -->``.""" | ||||
@@ -24,6 +24,7 @@ from ..utils import parse_anything | |||||
__all__ = ["ExternalLink"] | __all__ = ["ExternalLink"] | ||||
class ExternalLink(Node): | class ExternalLink(Node): | ||||
"""Represents an external link, like ``[http://example.com/ Example]``.""" | """Represents an external link, like ``[http://example.com/ Example]``.""" | ||||
@@ -83,6 +84,7 @@ class ExternalLink(Node): | |||||
def url(self, value): | def url(self, value): | ||||
# pylint: disable=import-outside-toplevel | # pylint: disable=import-outside-toplevel | ||||
from ..parser import contexts | from ..parser import contexts | ||||
self._url = parse_anything(value, contexts.EXT_LINK_URI) | self._url = parse_anything(value, contexts.EXT_LINK_URI) | ||||
@title.setter | @title.setter | ||||
@@ -24,6 +24,7 @@ from ...utils import parse_anything | |||||
__all__ = ["Attribute"] | __all__ = ["Attribute"] | ||||
class Attribute(StringMixIn): | class Attribute(StringMixIn): | ||||
"""Represents an attribute of an HTML tag. | """Represents an attribute of an HTML tag. | ||||
@@ -32,8 +33,15 @@ class Attribute(StringMixIn): | |||||
whose value is ``"foo"``. | whose value is ``"foo"``. | ||||
""" | """ | ||||
def __init__(self, name, value=None, quotes='"', pad_first=" ", | |||||
pad_before_eq="", pad_after_eq=""): | |||||
def __init__( | |||||
self, | |||||
name, | |||||
value=None, | |||||
quotes='"', | |||||
pad_first=" ", | |||||
pad_before_eq="", | |||||
pad_after_eq="", | |||||
): | |||||
super().__init__() | super().__init__() | ||||
self.name = name | self.name = name | ||||
self._quotes = None | self._quotes = None | ||||
@@ -25,6 +25,7 @@ from ...utils import parse_anything | |||||
__all__ = ["Parameter"] | __all__ = ["Parameter"] | ||||
class Parameter(StringMixIn): | class Parameter(StringMixIn): | ||||
"""Represents a paramater of a template. | """Represents a paramater of a template. | ||||
@@ -77,6 +78,5 @@ class Parameter(StringMixIn): | |||||
def showkey(self, newval): | def showkey(self, newval): | ||||
newval = bool(newval) | newval = bool(newval) | ||||
if not newval and not self.can_hide_key(self.name): | if not newval and not self.can_hide_key(self.name): | ||||
raise ValueError("parameter key {!r} cannot be hidden".format( | |||||
self.name)) | |||||
raise ValueError("parameter key {!r} cannot be hidden".format(self.name)) | |||||
self._showkey = newval | self._showkey = newval |
@@ -24,6 +24,7 @@ from ..utils import parse_anything | |||||
__all__ = ["Heading"] | __all__ = ["Heading"] | ||||
class Heading(Node): | class Heading(Node): | ||||
"""Represents a section heading in wikicode, like ``== Foo ==``.""" | """Represents a section heading in wikicode, like ``== Foo ==``.""" | ||||
@@ -24,6 +24,7 @@ from ._base import Node | |||||
__all__ = ["HTMLEntity"] | __all__ = ["HTMLEntity"] | ||||
class HTMLEntity(Node): | class HTMLEntity(Node): | ||||
"""Represents an HTML entity, like `` ``, either named or unnamed.""" | """Represents an HTML entity, like `` ``, either named or unnamed.""" | ||||
@@ -101,19 +102,23 @@ class HTMLEntity(Node): | |||||
except ValueError: | except ValueError: | ||||
if newval not in htmlentities.entitydefs: | if newval not in htmlentities.entitydefs: | ||||
raise ValueError( | raise ValueError( | ||||
"entity value {!r} is not a valid name".format(newval)) from None | |||||
"entity value {!r} is not a valid name".format(newval) | |||||
) from None | |||||
self._named = True | self._named = True | ||||
self._hexadecimal = False | self._hexadecimal = False | ||||
else: | else: | ||||
if intval < 0 or intval > 0x10FFFF: | if intval < 0 or intval > 0x10FFFF: | ||||
raise ValueError( | raise ValueError( | ||||
"entity value 0x{:x} is not in range(0x110000)".format(intval)) from None | |||||
"entity value 0x{:x} is not in range(0x110000)".format(intval) | |||||
) from None | |||||
self._named = False | self._named = False | ||||
self._hexadecimal = True | self._hexadecimal = True | ||||
else: | else: | ||||
test = int(newval, 16 if self.hexadecimal else 10) | test = int(newval, 16 if self.hexadecimal else 10) | ||||
if test < 0 or test > 0x10FFFF: | if test < 0 or test > 0x10FFFF: | ||||
raise ValueError("entity value {} is not in range(0x110000)".format(test)) | |||||
raise ValueError( | |||||
"entity value {} is not in range(0x110000)".format(test) | |||||
) | |||||
self._named = False | self._named = False | ||||
self._value = newval | self._value = newval | ||||
@@ -126,8 +131,10 @@ class HTMLEntity(Node): | |||||
try: | try: | ||||
int(self.value, 16) | int(self.value, 16) | ||||
except ValueError as exc: | except ValueError as exc: | ||||
raise ValueError("current entity value {!r} is not a valid " | |||||
"Unicode codepoint".format(self.value)) from exc | |||||
raise ValueError( | |||||
"current entity value {!r} is not a valid " | |||||
"Unicode codepoint".format(self.value) | |||||
) from exc | |||||
self._named = newval | self._named = newval | ||||
@hexadecimal.setter | @hexadecimal.setter | ||||
@@ -26,13 +26,24 @@ from ..utils import parse_anything | |||||
__all__ = ["Tag"] | __all__ = ["Tag"] | ||||
class Tag(Node): | class Tag(Node): | ||||
"""Represents an HTML-style tag in wikicode, like ``<ref>``.""" | """Represents an HTML-style tag in wikicode, like ``<ref>``.""" | ||||
def __init__(self, tag, contents=None, attrs=None, wiki_markup=None, | |||||
self_closing=False, invalid=False, implicit=False, padding="", | |||||
closing_tag=None, wiki_style_separator=None, | |||||
closing_wiki_markup=None): | |||||
def __init__( | |||||
self, | |||||
tag, | |||||
contents=None, | |||||
attrs=None, | |||||
wiki_markup=None, | |||||
self_closing=False, | |||||
invalid=False, | |||||
implicit=False, | |||||
padding="", | |||||
closing_tag=None, | |||||
wiki_style_separator=None, | |||||
closing_wiki_markup=None, | |||||
): | |||||
super().__init__() | super().__init__() | ||||
self.tag = tag | self.tag = tag | ||||
self.contents = contents | self.contents = contents | ||||
@@ -60,8 +71,14 @@ class Tag(Node): | |||||
if self.self_closing: | if self.self_closing: | ||||
return self.wiki_markup + attrs + padding + separator | return self.wiki_markup + attrs + padding + separator | ||||
close = self.closing_wiki_markup or "" | close = self.closing_wiki_markup or "" | ||||
return self.wiki_markup + attrs + padding + separator + \ | |||||
str(self.contents) + close | |||||
return ( | |||||
self.wiki_markup | |||||
+ attrs | |||||
+ padding | |||||
+ separator | |||||
+ str(self.contents) | |||||
+ close | |||||
) | |||||
result = ("</" if self.invalid else "<") + str(self.tag) | result = ("</" if self.invalid else "<") + str(self.tag) | ||||
if self.attributes: | if self.attributes: | ||||
@@ -270,8 +287,15 @@ class Tag(Node): | |||||
return attr | return attr | ||||
raise ValueError(name) | raise ValueError(name) | ||||
def add(self, name, value=None, quotes='"', pad_first=" ", | |||||
pad_before_eq="", pad_after_eq=""): | |||||
def add( | |||||
self, | |||||
name, | |||||
value=None, | |||||
quotes='"', | |||||
pad_first=" ", | |||||
pad_before_eq="", | |||||
pad_after_eq="", | |||||
): | |||||
"""Add an attribute with the given *name* and *value*. | """Add an attribute with the given *name* and *value*. | ||||
*name* and *value* can be anything parsable by | *name* and *value* can be anything parsable by | ||||
@@ -33,6 +33,7 @@ FLAGS = re.DOTALL | re.UNICODE | |||||
# Used to allow None as a valid fallback value | # Used to allow None as a valid fallback value | ||||
_UNSET = object() | _UNSET = object() | ||||
class Template(Node): | class Template(Node): | ||||
"""Represents a template in wikicode, like ``{{foo}}``.""" | """Represents a template in wikicode, like ``{{foo}}``.""" | ||||
@@ -153,7 +154,7 @@ class Template(Node): | |||||
def _fix_dependendent_params(self, i): | def _fix_dependendent_params(self, i): | ||||
"""Unhide keys if necessary after removing the param at index *i*.""" | """Unhide keys if necessary after removing the param at index *i*.""" | ||||
if not self.params[i].showkey: | if not self.params[i].showkey: | ||||
for param in self.params[i + 1:]: | |||||
for param in self.params[i + 1 :]: | |||||
if not param.showkey: | if not param.showkey: | ||||
param.showkey = True | param.showkey = True | ||||
@@ -175,9 +176,10 @@ class Template(Node): | |||||
If one exists, we should remove the given one rather than blanking it. | If one exists, we should remove the given one rather than blanking it. | ||||
""" | """ | ||||
if self.params[i].showkey: | if self.params[i].showkey: | ||||
following = self.params[i + 1:] | |||||
better_matches = [after.name.strip() == name and not after.showkey | |||||
for after in following] | |||||
following = self.params[i + 1 :] | |||||
better_matches = [ | |||||
after.name.strip() == name and not after.showkey for after in following | |||||
] | |||||
return any(better_matches) | return any(better_matches) | ||||
return False | return False | ||||
@@ -235,8 +237,7 @@ class Template(Node): | |||||
def __getitem__(self, name): | def __getitem__(self, name): | ||||
return self.get(name) | return self.get(name) | ||||
def add(self, name, value, showkey=None, before=None, | |||||
preserve_spacing=True): | |||||
def add(self, name, value, showkey=None, before=None, preserve_spacing=True): | |||||
"""Add a parameter to the template with a given *name* and *value*. | """Add a parameter to the template with a given *name* and *value*. | ||||
*name* and *value* can be anything parsable by | *name* and *value* can be anything parsable by | ||||
@@ -23,6 +23,7 @@ from ._base import Node | |||||
__all__ = ["Text"] | __all__ = ["Text"] | ||||
class Text(Node): | class Text(Node): | ||||
"""Represents ordinary, unformatted text with no special properties.""" | """Represents ordinary, unformatted text with no special properties.""" | ||||
@@ -24,6 +24,7 @@ from ..utils import parse_anything | |||||
__all__ = ["Wikilink"] | __all__ = ["Wikilink"] | ||||
class Wikilink(Node): | class Wikilink(Node): | ||||
"""Represents an internal wikilink, like ``[[Foo|Bar]]``.""" | """Represents an internal wikilink, like ``[[Foo|Bar]]``.""" | ||||
@@ -26,16 +26,20 @@ together into one interface. | |||||
from .builder import Builder | from .builder import Builder | ||||
from .errors import ParserError | from .errors import ParserError | ||||
try: | try: | ||||
from ._tokenizer import CTokenizer | from ._tokenizer import CTokenizer | ||||
use_c = True | use_c = True | ||||
except ImportError: | except ImportError: | ||||
from .tokenizer import Tokenizer | from .tokenizer import Tokenizer | ||||
CTokenizer = None | CTokenizer = None | ||||
use_c = False | use_c = False | ||||
__all__ = ["use_c", "Parser", "ParserError"] | __all__ = ["use_c", "Parser", "ParserError"] | ||||
class Parser: | class Parser: | ||||
"""Represents a parser for wikicode. | """Represents a parser for wikicode. | ||||
@@ -57,6 +61,7 @@ class Parser: | |||||
self._tokenizer = CTokenizer() | self._tokenizer = CTokenizer() | ||||
else: | else: | ||||
from .tokenizer import Tokenizer | from .tokenizer import Tokenizer | ||||
self._tokenizer = Tokenizer() | self._tokenizer = Tokenizer() | ||||
self._builder = Builder() | self._builder = Builder() | ||||
@@ -21,24 +21,34 @@ | |||||
from . import tokens | from . import tokens | ||||
from .errors import ParserError | from .errors import ParserError | ||||
from ..nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, Tag, | |||||
Template, Text, Wikilink) | |||||
from ..nodes import ( | |||||
Argument, | |||||
Comment, | |||||
ExternalLink, | |||||
Heading, | |||||
HTMLEntity, | |||||
Tag, | |||||
Template, | |||||
Text, | |||||
Wikilink, | |||||
) | |||||
from ..nodes.extras import Attribute, Parameter | from ..nodes.extras import Attribute, Parameter | ||||
from ..smart_list import SmartList | from ..smart_list import SmartList | ||||
from ..wikicode import Wikicode | from ..wikicode import Wikicode | ||||
__all__ = ["Builder"] | __all__ = ["Builder"] | ||||
_HANDLERS = { | |||||
tokens.Text: lambda self, token: Text(token.text) | |||||
} | |||||
_HANDLERS = {tokens.Text: lambda self, token: Text(token.text)} | |||||
def _add_handler(token_type): | def _add_handler(token_type): | ||||
"""Create a decorator that adds a handler function to the lookup table.""" | """Create a decorator that adds a handler function to the lookup table.""" | ||||
def decorator(func): | def decorator(func): | ||||
"""Add a handler function to the lookup table.""" | """Add a handler function to the lookup table.""" | ||||
_HANDLERS[token_type] = func | _HANDLERS[token_type] = func | ||||
return func | return func | ||||
return decorator | return decorator | ||||
@@ -84,8 +94,9 @@ class Builder: | |||||
key = self._pop() | key = self._pop() | ||||
showkey = True | showkey = True | ||||
self._push() | self._push() | ||||
elif isinstance(token, (tokens.TemplateParamSeparator, | |||||
tokens.TemplateClose)): | |||||
elif isinstance( | |||||
token, (tokens.TemplateParamSeparator, tokens.TemplateClose) | |||||
): | |||||
self._tokens.append(token) | self._tokens.append(token) | ||||
value = self._pop() | value = self._pop() | ||||
if key is None: | if key is None: | ||||
@@ -167,10 +178,17 @@ class Builder: | |||||
self._push() | self._push() | ||||
elif isinstance(token, tokens.ExternalLinkClose): | elif isinstance(token, tokens.ExternalLinkClose): | ||||
if url is not None: | if url is not None: | ||||
return ExternalLink(url, self._pop(), brackets=brackets, | |||||
suppress_space=suppress_space is True) | |||||
return ExternalLink(self._pop(), brackets=brackets, | |||||
suppress_space=suppress_space is True) | |||||
return ExternalLink( | |||||
url, | |||||
self._pop(), | |||||
brackets=brackets, | |||||
suppress_space=suppress_space is True, | |||||
) | |||||
return ExternalLink( | |||||
self._pop(), | |||||
brackets=brackets, | |||||
suppress_space=suppress_space is True, | |||||
) | |||||
else: | else: | ||||
self._write(self._handle_token(token)) | self._write(self._handle_token(token)) | ||||
raise ParserError("_handle_external_link() missed a close token") | raise ParserError("_handle_external_link() missed a close token") | ||||
@@ -184,8 +202,9 @@ class Builder: | |||||
if isinstance(token, tokens.HTMLEntityHex): | if isinstance(token, tokens.HTMLEntityHex): | ||||
text = self._tokens.pop() | text = self._tokens.pop() | ||||
self._tokens.pop() # Remove HTMLEntityEnd | self._tokens.pop() # Remove HTMLEntityEnd | ||||
return HTMLEntity(text.text, named=False, hexadecimal=True, | |||||
hex_char=token.char) | |||||
return HTMLEntity( | |||||
text.text, named=False, hexadecimal=True, hex_char=token.char | |||||
) | |||||
self._tokens.pop() # Remove HTMLEntityEnd | self._tokens.pop() # Remove HTMLEntityEnd | ||||
return HTMLEntity(token.text, named=False, hexadecimal=False) | return HTMLEntity(token.text, named=False, hexadecimal=False) | ||||
self._tokens.pop() # Remove HTMLEntityEnd | self._tokens.pop() # Remove HTMLEntityEnd | ||||
@@ -227,15 +246,23 @@ class Builder: | |||||
self._push() | self._push() | ||||
elif isinstance(token, tokens.TagAttrQuote): | elif isinstance(token, tokens.TagAttrQuote): | ||||
quotes = token.char | quotes = token.char | ||||
elif isinstance(token, (tokens.TagAttrStart, tokens.TagCloseOpen, | |||||
tokens.TagCloseSelfclose)): | |||||
elif isinstance( | |||||
token, | |||||
(tokens.TagAttrStart, tokens.TagCloseOpen, tokens.TagCloseSelfclose), | |||||
): | |||||
self._tokens.append(token) | self._tokens.append(token) | ||||
if name: | if name: | ||||
value = self._pop() | value = self._pop() | ||||
else: | else: | ||||
name, value = self._pop(), None | name, value = self._pop(), None | ||||
return Attribute(name, value, quotes, start.pad_first, | |||||
start.pad_before_eq, start.pad_after_eq) | |||||
return Attribute( | |||||
name, | |||||
value, | |||||
quotes, | |||||
start.pad_first, | |||||
start.pad_before_eq, | |||||
start.pad_after_eq, | |||||
) | |||||
else: | else: | ||||
self._write(self._handle_token(token)) | self._write(self._handle_token(token)) | ||||
raise ParserError("_handle_attribute() missed a close token") | raise ParserError("_handle_attribute() missed a close token") | ||||
@@ -271,9 +298,19 @@ class Builder: | |||||
else: | else: | ||||
self_closing = False | self_closing = False | ||||
closing_tag = self._pop() | closing_tag = self._pop() | ||||
return Tag(tag, contents, attrs, wiki_markup, self_closing, | |||||
invalid, implicit, padding, closing_tag, | |||||
wiki_style_separator, closing_wiki_markup) | |||||
return Tag( | |||||
tag, | |||||
contents, | |||||
attrs, | |||||
wiki_markup, | |||||
self_closing, | |||||
invalid, | |||||
implicit, | |||||
padding, | |||||
closing_tag, | |||||
wiki_style_separator, | |||||
closing_wiki_markup, | |||||
) | |||||
else: | else: | ||||
self._write(self._handle_token(token)) | self._write(self._handle_token(token)) | ||||
raise ParserError("_handle_tag() missed a close token") | raise ParserError("_handle_tag() missed a close token") | ||||
@@ -116,21 +116,21 @@ Aggregate contexts: | |||||
# Local contexts: | # Local contexts: | ||||
TEMPLATE_NAME = 1 << 0 | |||||
TEMPLATE_PARAM_KEY = 1 << 1 | |||||
TEMPLATE_NAME = 1 << 0 | |||||
TEMPLATE_PARAM_KEY = 1 << 1 | |||||
TEMPLATE_PARAM_VALUE = 1 << 2 | TEMPLATE_PARAM_VALUE = 1 << 2 | ||||
TEMPLATE = TEMPLATE_NAME + TEMPLATE_PARAM_KEY + TEMPLATE_PARAM_VALUE | TEMPLATE = TEMPLATE_NAME + TEMPLATE_PARAM_KEY + TEMPLATE_PARAM_VALUE | ||||
ARGUMENT_NAME = 1 << 3 | |||||
ARGUMENT_NAME = 1 << 3 | |||||
ARGUMENT_DEFAULT = 1 << 4 | ARGUMENT_DEFAULT = 1 << 4 | ||||
ARGUMENT = ARGUMENT_NAME + ARGUMENT_DEFAULT | ARGUMENT = ARGUMENT_NAME + ARGUMENT_DEFAULT | ||||
WIKILINK_TITLE = 1 << 5 | WIKILINK_TITLE = 1 << 5 | ||||
WIKILINK_TEXT = 1 << 6 | |||||
WIKILINK_TEXT = 1 << 6 | |||||
WIKILINK = WIKILINK_TITLE + WIKILINK_TEXT | WIKILINK = WIKILINK_TITLE + WIKILINK_TEXT | ||||
EXT_LINK_URI = 1 << 7 | |||||
EXT_LINK_TITLE = 1 << 8 | |||||
EXT_LINK_URI = 1 << 7 | |||||
EXT_LINK_TITLE = 1 << 8 | |||||
EXT_LINK = EXT_LINK_URI + EXT_LINK_TITLE | EXT_LINK = EXT_LINK_URI + EXT_LINK_TITLE | ||||
HEADING_LEVEL_1 = 1 << 9 | HEADING_LEVEL_1 = 1 << 9 | ||||
@@ -139,42 +139,61 @@ HEADING_LEVEL_3 = 1 << 11 | |||||
HEADING_LEVEL_4 = 1 << 12 | HEADING_LEVEL_4 = 1 << 12 | ||||
HEADING_LEVEL_5 = 1 << 13 | HEADING_LEVEL_5 = 1 << 13 | ||||
HEADING_LEVEL_6 = 1 << 14 | HEADING_LEVEL_6 = 1 << 14 | ||||
HEADING = (HEADING_LEVEL_1 + HEADING_LEVEL_2 + HEADING_LEVEL_3 + | |||||
HEADING_LEVEL_4 + HEADING_LEVEL_5 + HEADING_LEVEL_6) | |||||
TAG_OPEN = 1 << 15 | |||||
TAG_ATTR = 1 << 16 | |||||
TAG_BODY = 1 << 17 | |||||
HEADING = ( | |||||
HEADING_LEVEL_1 | |||||
+ HEADING_LEVEL_2 | |||||
+ HEADING_LEVEL_3 | |||||
+ HEADING_LEVEL_4 | |||||
+ HEADING_LEVEL_5 | |||||
+ HEADING_LEVEL_6 | |||||
) | |||||
TAG_OPEN = 1 << 15 | |||||
TAG_ATTR = 1 << 16 | |||||
TAG_BODY = 1 << 17 | |||||
TAG_CLOSE = 1 << 18 | TAG_CLOSE = 1 << 18 | ||||
TAG = TAG_OPEN + TAG_ATTR + TAG_BODY + TAG_CLOSE | TAG = TAG_OPEN + TAG_ATTR + TAG_BODY + TAG_CLOSE | ||||
STYLE_ITALICS = 1 << 19 | |||||
STYLE_BOLD = 1 << 20 | |||||
STYLE_PASS_AGAIN = 1 << 21 | |||||
STYLE_SECOND_PASS = 1 << 22 | |||||
STYLE_ITALICS = 1 << 19 | |||||
STYLE_BOLD = 1 << 20 | |||||
STYLE_PASS_AGAIN = 1 << 21 | |||||
STYLE_SECOND_PASS = 1 << 22 | |||||
STYLE = STYLE_ITALICS + STYLE_BOLD + STYLE_PASS_AGAIN + STYLE_SECOND_PASS | STYLE = STYLE_ITALICS + STYLE_BOLD + STYLE_PASS_AGAIN + STYLE_SECOND_PASS | ||||
DL_TERM = 1 << 23 | DL_TERM = 1 << 23 | ||||
HAS_TEXT = 1 << 24 | |||||
FAIL_ON_TEXT = 1 << 25 | |||||
FAIL_NEXT = 1 << 26 | |||||
HAS_TEXT = 1 << 24 | |||||
FAIL_ON_TEXT = 1 << 25 | |||||
FAIL_NEXT = 1 << 26 | |||||
FAIL_ON_LBRACE = 1 << 27 | FAIL_ON_LBRACE = 1 << 27 | ||||
FAIL_ON_RBRACE = 1 << 28 | FAIL_ON_RBRACE = 1 << 28 | ||||
FAIL_ON_EQUALS = 1 << 29 | FAIL_ON_EQUALS = 1 << 29 | ||||
HAS_TEMPLATE = 1 << 30 | |||||
SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE + | |||||
FAIL_ON_RBRACE + FAIL_ON_EQUALS + HAS_TEMPLATE) | |||||
TABLE_OPEN = 1 << 31 | |||||
TABLE_CELL_OPEN = 1 << 32 | |||||
HAS_TEMPLATE = 1 << 30 | |||||
SAFETY_CHECK = ( | |||||
HAS_TEXT | |||||
+ FAIL_ON_TEXT | |||||
+ FAIL_NEXT | |||||
+ FAIL_ON_LBRACE | |||||
+ FAIL_ON_RBRACE | |||||
+ FAIL_ON_EQUALS | |||||
+ HAS_TEMPLATE | |||||
) | |||||
TABLE_OPEN = 1 << 31 | |||||
TABLE_CELL_OPEN = 1 << 32 | |||||
TABLE_CELL_STYLE = 1 << 33 | TABLE_CELL_STYLE = 1 << 33 | ||||
TABLE_ROW_OPEN = 1 << 34 | |||||
TABLE_TD_LINE = 1 << 35 | |||||
TABLE_TH_LINE = 1 << 36 | |||||
TABLE_ROW_OPEN = 1 << 34 | |||||
TABLE_TD_LINE = 1 << 35 | |||||
TABLE_TH_LINE = 1 << 36 | |||||
TABLE_CELL_LINE_CONTEXTS = TABLE_TD_LINE + TABLE_TH_LINE + TABLE_CELL_STYLE | TABLE_CELL_LINE_CONTEXTS = TABLE_TD_LINE + TABLE_TH_LINE + TABLE_CELL_STYLE | ||||
TABLE = (TABLE_OPEN + TABLE_CELL_OPEN + TABLE_CELL_STYLE + TABLE_ROW_OPEN + | |||||
TABLE_TD_LINE + TABLE_TH_LINE) | |||||
TABLE = ( | |||||
TABLE_OPEN | |||||
+ TABLE_CELL_OPEN | |||||
+ TABLE_CELL_STYLE | |||||
+ TABLE_ROW_OPEN | |||||
+ TABLE_TD_LINE | |||||
+ TABLE_TH_LINE | |||||
) | |||||
HTML_ENTITY = 1 << 37 | HTML_ENTITY = 1 << 37 | ||||
@@ -184,14 +203,20 @@ GL_HEADING = 1 << 0 | |||||
# Aggregate contexts: | # Aggregate contexts: | ||||
FAIL = (TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK_TITLE + HEADING + TAG + | |||||
STYLE + TABLE) | |||||
UNSAFE = (TEMPLATE_NAME + WIKILINK_TITLE + EXT_LINK_TITLE + | |||||
TEMPLATE_PARAM_KEY + ARGUMENT_NAME + TAG_CLOSE) | |||||
FAIL = TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK_TITLE + HEADING + TAG + STYLE + TABLE | |||||
UNSAFE = ( | |||||
TEMPLATE_NAME | |||||
+ WIKILINK_TITLE | |||||
+ EXT_LINK_TITLE | |||||
+ TEMPLATE_PARAM_KEY | |||||
+ ARGUMENT_NAME | |||||
+ TAG_CLOSE | |||||
) | |||||
DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE + TABLE_ROW_OPEN | DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE + TABLE_ROW_OPEN | ||||
NO_WIKILINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK_URI | NO_WIKILINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK_URI | ||||
NO_EXT_LINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK | NO_EXT_LINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK | ||||
def describe(context): | def describe(context): | ||||
"""Return a string describing the given context value, for debugging.""" | """Return a string describing the given context value, for debugging.""" | ||||
flags = [] | flags = [] | ||||
@@ -1,6 +1,6 @@ | |||||
/* | /* | ||||
* avl_tree.h - intrusive, nonrecursive AVL tree data structure (self-balancing | * avl_tree.h - intrusive, nonrecursive AVL tree data structure (self-balancing | ||||
* binary search tree), header file | |||||
* binary search tree), header file | |||||
* | * | ||||
* Written in 2014-2016 by Eric Biggers <ebiggers3@gmail.com> | * Written in 2014-2016 by Eric Biggers <ebiggers3@gmail.com> | ||||
* Slight changes for compatibility by Ben Kurtovic <ben.kurtovic@gmail.com> | * Slight changes for compatibility by Ben Kurtovic <ben.kurtovic@gmail.com> | ||||
@@ -24,60 +24,60 @@ | |||||
#include <stddef.h> | #include <stddef.h> | ||||
#if !defined(_MSC_VER) || (_MSC_VER >= 1600) | #if !defined(_MSC_VER) || (_MSC_VER >= 1600) | ||||
#include <stdint.h> | |||||
# include <stdint.h> | |||||
#endif | #endif | ||||
#ifdef __GNUC__ | #ifdef __GNUC__ | ||||
# define AVL_INLINE inline __attribute__((always_inline)) | |||||
# define AVL_INLINE inline __attribute__((always_inline)) | |||||
#elif defined(_MSC_VER) && (_MSC_VER < 1900) | #elif defined(_MSC_VER) && (_MSC_VER < 1900) | ||||
# define AVL_INLINE __inline | |||||
# define AVL_INLINE __inline | |||||
#else | #else | ||||
# define AVL_INLINE inline | |||||
# define AVL_INLINE inline | |||||
#endif | #endif | ||||
/* Node in an AVL tree. Embed this in some other data structure. */ | /* Node in an AVL tree. Embed this in some other data structure. */ | ||||
struct avl_tree_node { | struct avl_tree_node { | ||||
/* Pointer to left child or NULL */ | |||||
struct avl_tree_node *left; | |||||
/* Pointer to left child or NULL */ | |||||
struct avl_tree_node *left; | |||||
/* Pointer to right child or NULL */ | |||||
struct avl_tree_node *right; | |||||
/* Pointer to right child or NULL */ | |||||
struct avl_tree_node *right; | |||||
/* Pointer to parent combined with the balance factor. This saves 4 or | |||||
* 8 bytes of memory depending on the CPU architecture. | |||||
* | |||||
* Low 2 bits: One greater than the balance factor of this subtree, | |||||
* which is equal to height(right) - height(left). The mapping is: | |||||
* | |||||
* 00 => -1 | |||||
* 01 => 0 | |||||
* 10 => +1 | |||||
* 11 => undefined | |||||
* | |||||
* The rest of the bits are the pointer to the parent node. It must be | |||||
* 4-byte aligned, and it will be NULL if this is the root node and | |||||
* therefore has no parent. */ | |||||
uintptr_t parent_balance; | |||||
/* Pointer to parent combined with the balance factor. This saves 4 or | |||||
* 8 bytes of memory depending on the CPU architecture. | |||||
* | |||||
* Low 2 bits: One greater than the balance factor of this subtree, | |||||
* which is equal to height(right) - height(left). The mapping is: | |||||
* | |||||
* 00 => -1 | |||||
* 01 => 0 | |||||
* 10 => +1 | |||||
* 11 => undefined | |||||
* | |||||
* The rest of the bits are the pointer to the parent node. It must be | |||||
* 4-byte aligned, and it will be NULL if this is the root node and | |||||
* therefore has no parent. */ | |||||
uintptr_t parent_balance; | |||||
}; | }; | ||||
/* Cast an AVL tree node to the containing data structure. */ | /* Cast an AVL tree node to the containing data structure. */ | ||||
#define avl_tree_entry(entry, type, member) \ | |||||
((type*) ((char *)(entry) - offsetof(type, member))) | |||||
#define avl_tree_entry(entry, type, member) \ | |||||
((type *) ((char *) (entry) -offsetof(type, member))) | |||||
/* Returns a pointer to the parent of the specified AVL tree node, or NULL if it | /* Returns a pointer to the parent of the specified AVL tree node, or NULL if it | ||||
* is already the root of the tree. */ | * is already the root of the tree. */ | ||||
static AVL_INLINE struct avl_tree_node * | static AVL_INLINE struct avl_tree_node * | ||||
avl_get_parent(const struct avl_tree_node *node) | avl_get_parent(const struct avl_tree_node *node) | ||||
{ | { | ||||
return (struct avl_tree_node *)(node->parent_balance & ~3); | |||||
return (struct avl_tree_node *) (node->parent_balance & ~3); | |||||
} | } | ||||
/* Marks the specified AVL tree node as unlinked from any tree. */ | /* Marks the specified AVL tree node as unlinked from any tree. */ | ||||
static AVL_INLINE void | static AVL_INLINE void | ||||
avl_tree_node_set_unlinked(struct avl_tree_node *node) | avl_tree_node_set_unlinked(struct avl_tree_node *node) | ||||
{ | { | ||||
node->parent_balance = (uintptr_t)node; | |||||
node->parent_balance = (uintptr_t) node; | |||||
} | } | ||||
/* Returns true iff the specified AVL tree node has been marked with | /* Returns true iff the specified AVL tree node has been marked with | ||||
@@ -86,30 +86,29 @@ avl_tree_node_set_unlinked(struct avl_tree_node *node) | |||||
static AVL_INLINE int | static AVL_INLINE int | ||||
avl_tree_node_is_unlinked(const struct avl_tree_node *node) | avl_tree_node_is_unlinked(const struct avl_tree_node *node) | ||||
{ | { | ||||
return node->parent_balance == (uintptr_t)node; | |||||
return node->parent_balance == (uintptr_t) node; | |||||
} | } | ||||
/* (Internal use only) */ | /* (Internal use only) */ | ||||
extern void | |||||
avl_tree_rebalance_after_insert(struct avl_tree_node **root_ptr, | |||||
struct avl_tree_node *inserted); | |||||
extern void avl_tree_rebalance_after_insert(struct avl_tree_node **root_ptr, | |||||
struct avl_tree_node *inserted); | |||||
/* | /* | ||||
* Looks up an item in the specified AVL tree. | * Looks up an item in the specified AVL tree. | ||||
* | * | ||||
* @root | * @root | ||||
* Pointer to the root of the AVL tree. (This can be NULL --- that just | |||||
* means the tree is empty.) | |||||
* Pointer to the root of the AVL tree. (This can be NULL --- that just | |||||
* means the tree is empty.) | |||||
* | * | ||||
* @cmp_ctx | * @cmp_ctx | ||||
* First argument to pass to the comparison callback. This generally | |||||
* should be a pointer to an object equal to the one being searched for. | |||||
* First argument to pass to the comparison callback. This generally | |||||
* should be a pointer to an object equal to the one being searched for. | |||||
* | * | ||||
* @cmp | * @cmp | ||||
* Comparison callback. Must return < 0, 0, or > 0 if the first argument | |||||
* is less than, equal to, or greater than the second argument, | |||||
* respectively. The first argument will be @cmp_ctx and the second | |||||
* argument will be a pointer to the AVL tree node of an item in the tree. | |||||
* Comparison callback. Must return < 0, 0, or > 0 if the first argument | |||||
* is less than, equal to, or greater than the second argument, | |||||
* respectively. The first argument will be @cmp_ctx and the second | |||||
* argument will be a pointer to the AVL tree node of an item in the tree. | |||||
* | * | ||||
* Returns a pointer to the AVL tree node of the resulting item, or NULL if the | * Returns a pointer to the AVL tree node of the resulting item, or NULL if the | ||||
* item was not found. | * item was not found. | ||||
@@ -117,48 +116,49 @@ avl_tree_rebalance_after_insert(struct avl_tree_node **root_ptr, | |||||
* Example: | * Example: | ||||
* | * | ||||
* struct int_wrapper { | * struct int_wrapper { | ||||
* int data; | |||||
* struct avl_tree_node index_node; | |||||
* int data; | |||||
* struct avl_tree_node index_node; | |||||
* }; | * }; | ||||
* | * | ||||
* static int _avl_cmp_int_to_node(const void *intptr, | * static int _avl_cmp_int_to_node(const void *intptr, | ||||
* const struct avl_tree_node *nodeptr) | |||||
* const struct avl_tree_node *nodeptr) | |||||
* { | * { | ||||
* int n1 = *(const int *)intptr; | |||||
* int n2 = avl_tree_entry(nodeptr, struct int_wrapper, index_node)->data; | |||||
* if (n1 < n2) | |||||
* return -1; | |||||
* else if (n1 > n2) | |||||
* return 1; | |||||
* else | |||||
* return 0; | |||||
* int n1 = *(const int *)intptr; | |||||
* int n2 = avl_tree_entry(nodeptr, struct int_wrapper, index_node)->data; | |||||
* if (n1 < n2) | |||||
* return -1; | |||||
* else if (n1 > n2) | |||||
* return 1; | |||||
* else | |||||
* return 0; | |||||
* } | * } | ||||
* | * | ||||
* bool contains_int(struct avl_tree_node *root, int n) | * bool contains_int(struct avl_tree_node *root, int n) | ||||
* { | * { | ||||
* struct avl_tree_node *result; | |||||
* struct avl_tree_node *result; | |||||
* | * | ||||
* result = avl_tree_lookup(root, &n, _avl_cmp_int_to_node); | |||||
* return result ? true : false; | |||||
* result = avl_tree_lookup(root, &n, _avl_cmp_int_to_node); | |||||
* return result ? true : false; | |||||
* } | * } | ||||
*/ | */ | ||||
static AVL_INLINE struct avl_tree_node * | static AVL_INLINE struct avl_tree_node * | ||||
avl_tree_lookup(const struct avl_tree_node *root, | avl_tree_lookup(const struct avl_tree_node *root, | ||||
const void *cmp_ctx, | |||||
int (*cmp)(const void *, const struct avl_tree_node *)) | |||||
const void *cmp_ctx, | |||||
int (*cmp)(const void *, const struct avl_tree_node *)) | |||||
{ | { | ||||
const struct avl_tree_node *cur = root; | |||||
const struct avl_tree_node *cur = root; | |||||
while (cur) { | |||||
int res = (*cmp)(cmp_ctx, cur); | |||||
if (res < 0) | |||||
cur = cur->left; | |||||
else if (res > 0) | |||||
cur = cur->right; | |||||
else | |||||
break; | |||||
} | |||||
return (struct avl_tree_node*)cur; | |||||
while (cur) { | |||||
int res = (*cmp)(cmp_ctx, cur); | |||||
if (res < 0) { | |||||
cur = cur->left; | |||||
} else if (res > 0) { | |||||
cur = cur->right; | |||||
} else { | |||||
break; | |||||
} | |||||
} | |||||
return (struct avl_tree_node *) cur; | |||||
} | } | ||||
/* Same as avl_tree_lookup(), but uses a more specific type for the comparison | /* Same as avl_tree_lookup(), but uses a more specific type for the comparison | ||||
@@ -167,44 +167,45 @@ avl_tree_lookup(const struct avl_tree_node *root, | |||||
* embedded 'struct avl_tree_node'. */ | * embedded 'struct avl_tree_node'. */ | ||||
static AVL_INLINE struct avl_tree_node * | static AVL_INLINE struct avl_tree_node * | ||||
avl_tree_lookup_node(const struct avl_tree_node *root, | avl_tree_lookup_node(const struct avl_tree_node *root, | ||||
const struct avl_tree_node *node, | |||||
int (*cmp)(const struct avl_tree_node *, | |||||
const struct avl_tree_node *)) | |||||
const struct avl_tree_node *node, | |||||
int (*cmp)(const struct avl_tree_node *, | |||||
const struct avl_tree_node *)) | |||||
{ | { | ||||
const struct avl_tree_node *cur = root; | |||||
const struct avl_tree_node *cur = root; | |||||
while (cur) { | |||||
int res = (*cmp)(node, cur); | |||||
if (res < 0) | |||||
cur = cur->left; | |||||
else if (res > 0) | |||||
cur = cur->right; | |||||
else | |||||
break; | |||||
} | |||||
return (struct avl_tree_node*)cur; | |||||
while (cur) { | |||||
int res = (*cmp)(node, cur); | |||||
if (res < 0) { | |||||
cur = cur->left; | |||||
} else if (res > 0) { | |||||
cur = cur->right; | |||||
} else { | |||||
break; | |||||
} | |||||
} | |||||
return (struct avl_tree_node *) cur; | |||||
} | } | ||||
/* | /* | ||||
* Inserts an item into the specified AVL tree. | * Inserts an item into the specified AVL tree. | ||||
* | * | ||||
* @root_ptr | * @root_ptr | ||||
* Location of the AVL tree's root pointer. Indirection is needed because | |||||
* the root node may change as a result of rotations caused by the | |||||
* insertion. Initialize *root_ptr to NULL for an empty tree. | |||||
* Location of the AVL tree's root pointer. Indirection is needed because | |||||
* the root node may change as a result of rotations caused by the | |||||
* insertion. Initialize *root_ptr to NULL for an empty tree. | |||||
* | * | ||||
* @item | * @item | ||||
* Pointer to the `struct avl_tree_node' embedded in the item to insert. | |||||
* No members in it need be pre-initialized, although members in the | |||||
* containing structure should be pre-initialized so that @cmp can use them | |||||
* in comparisons. | |||||
* Pointer to the `struct avl_tree_node' embedded in the item to insert. | |||||
* No members in it need be pre-initialized, although members in the | |||||
* containing structure should be pre-initialized so that @cmp can use them | |||||
* in comparisons. | |||||
* | * | ||||
* @cmp | * @cmp | ||||
* Comparison callback. Must return < 0, 0, or > 0 if the first argument | |||||
* is less than, equal to, or greater than the second argument, | |||||
* respectively. The first argument will be @item and the second | |||||
* argument will be a pointer to an AVL tree node embedded in some | |||||
* previously-inserted item to which @item is being compared. | |||||
* Comparison callback. Must return < 0, 0, or > 0 if the first argument | |||||
* is less than, equal to, or greater than the second argument, | |||||
* respectively. The first argument will be @item and the second | |||||
* argument will be a pointer to an AVL tree node embedded in some | |||||
* previously-inserted item to which @item is being compared. | |||||
* | * | ||||
* If no item in the tree is comparatively equal (via @cmp) to @item, inserts | * If no item in the tree is comparatively equal (via @cmp) to @item, inserts | ||||
* @item and returns NULL. Otherwise does nothing and returns a pointer to the | * @item and returns NULL. Otherwise does nothing and returns a pointer to the | ||||
@@ -214,150 +215,138 @@ avl_tree_lookup_node(const struct avl_tree_node *root, | |||||
* Example: | * Example: | ||||
* | * | ||||
* struct int_wrapper { | * struct int_wrapper { | ||||
* int data; | |||||
* struct avl_tree_node index_node; | |||||
* int data; | |||||
* struct avl_tree_node index_node; | |||||
* }; | * }; | ||||
* | * | ||||
* #define GET_DATA(i) avl_tree_entry((i), struct int_wrapper, index_node)->data | * #define GET_DATA(i) avl_tree_entry((i), struct int_wrapper, index_node)->data | ||||
* | * | ||||
* static int _avl_cmp_ints(const struct avl_tree_node *node1, | * static int _avl_cmp_ints(const struct avl_tree_node *node1, | ||||
* const struct avl_tree_node *node2) | |||||
* const struct avl_tree_node *node2) | |||||
* { | * { | ||||
* int n1 = GET_DATA(node1); | |||||
* int n2 = GET_DATA(node2); | |||||
* if (n1 < n2) | |||||
* return -1; | |||||
* else if (n1 > n2) | |||||
* return 1; | |||||
* else | |||||
* return 0; | |||||
* int n1 = GET_DATA(node1); | |||||
* int n2 = GET_DATA(node2); | |||||
* if (n1 < n2) | |||||
* return -1; | |||||
* else if (n1 > n2) | |||||
* return 1; | |||||
* else | |||||
* return 0; | |||||
* } | * } | ||||
* | * | ||||
* bool insert_int(struct avl_tree_node **root_ptr, int data) | * bool insert_int(struct avl_tree_node **root_ptr, int data) | ||||
* { | * { | ||||
* struct int_wrapper *i = malloc(sizeof(struct int_wrapper)); | |||||
* i->data = data; | |||||
* if (avl_tree_insert(root_ptr, &i->index_node, _avl_cmp_ints)) { | |||||
* // Duplicate. | |||||
* free(i); | |||||
* return false; | |||||
* } | |||||
* return true; | |||||
* struct int_wrapper *i = malloc(sizeof(struct int_wrapper)); | |||||
* i->data = data; | |||||
* if (avl_tree_insert(root_ptr, &i->index_node, _avl_cmp_ints)) { | |||||
* // Duplicate. | |||||
* free(i); | |||||
* return false; | |||||
* } | |||||
* return true; | |||||
* } | * } | ||||
*/ | */ | ||||
static AVL_INLINE struct avl_tree_node * | static AVL_INLINE struct avl_tree_node * | ||||
avl_tree_insert(struct avl_tree_node **root_ptr, | avl_tree_insert(struct avl_tree_node **root_ptr, | ||||
struct avl_tree_node *item, | |||||
int (*cmp)(const struct avl_tree_node *, | |||||
const struct avl_tree_node *)) | |||||
struct avl_tree_node *item, | |||||
int (*cmp)(const struct avl_tree_node *, const struct avl_tree_node *)) | |||||
{ | { | ||||
struct avl_tree_node **cur_ptr = root_ptr, *cur = NULL; | |||||
int res; | |||||
struct avl_tree_node **cur_ptr = root_ptr, *cur = NULL; | |||||
int res; | |||||
while (*cur_ptr) { | |||||
cur = *cur_ptr; | |||||
res = (*cmp)(item, cur); | |||||
if (res < 0) | |||||
cur_ptr = &cur->left; | |||||
else if (res > 0) | |||||
cur_ptr = &cur->right; | |||||
else | |||||
return cur; | |||||
} | |||||
*cur_ptr = item; | |||||
item->parent_balance = (uintptr_t)cur | 1; | |||||
avl_tree_rebalance_after_insert(root_ptr, item); | |||||
return NULL; | |||||
while (*cur_ptr) { | |||||
cur = *cur_ptr; | |||||
res = (*cmp)(item, cur); | |||||
if (res < 0) { | |||||
cur_ptr = &cur->left; | |||||
} else if (res > 0) { | |||||
cur_ptr = &cur->right; | |||||
} else { | |||||
return cur; | |||||
} | |||||
} | |||||
*cur_ptr = item; | |||||
item->parent_balance = (uintptr_t) cur | 1; | |||||
avl_tree_rebalance_after_insert(root_ptr, item); | |||||
return NULL; | |||||
} | } | ||||
/* Removes an item from the specified AVL tree. | /* Removes an item from the specified AVL tree. | ||||
* See implementation for details. */ | * See implementation for details. */ | ||||
extern void | |||||
avl_tree_remove(struct avl_tree_node **root_ptr, struct avl_tree_node *node); | |||||
extern void avl_tree_remove(struct avl_tree_node **root_ptr, | |||||
struct avl_tree_node *node); | |||||
/* Nonrecursive AVL tree traversal functions */ | /* Nonrecursive AVL tree traversal functions */ | ||||
extern struct avl_tree_node * | |||||
avl_tree_first_in_order(const struct avl_tree_node *root); | |||||
extern struct avl_tree_node *avl_tree_first_in_order(const struct avl_tree_node *root); | |||||
extern struct avl_tree_node * | |||||
avl_tree_last_in_order(const struct avl_tree_node *root); | |||||
extern struct avl_tree_node *avl_tree_last_in_order(const struct avl_tree_node *root); | |||||
extern struct avl_tree_node * | |||||
avl_tree_next_in_order(const struct avl_tree_node *node); | |||||
extern struct avl_tree_node *avl_tree_next_in_order(const struct avl_tree_node *node); | |||||
extern struct avl_tree_node * | |||||
avl_tree_prev_in_order(const struct avl_tree_node *node); | |||||
extern struct avl_tree_node *avl_tree_prev_in_order(const struct avl_tree_node *node); | |||||
extern struct avl_tree_node * | extern struct avl_tree_node * | ||||
avl_tree_first_in_postorder(const struct avl_tree_node *root); | avl_tree_first_in_postorder(const struct avl_tree_node *root); | ||||
extern struct avl_tree_node * | extern struct avl_tree_node * | ||||
avl_tree_next_in_postorder(const struct avl_tree_node *prev, | avl_tree_next_in_postorder(const struct avl_tree_node *prev, | ||||
const struct avl_tree_node *prev_parent); | |||||
const struct avl_tree_node *prev_parent); | |||||
/* | /* | ||||
* Iterate through the nodes in an AVL tree in sorted order. | * Iterate through the nodes in an AVL tree in sorted order. | ||||
* You may not modify the tree during the iteration. | * You may not modify the tree during the iteration. | ||||
* | * | ||||
* @child_struct | * @child_struct | ||||
* Variable that will receive a pointer to each struct inserted into the | |||||
* tree. | |||||
* Variable that will receive a pointer to each struct inserted into the | |||||
* tree. | |||||
* @root | * @root | ||||
* Root of the AVL tree. | |||||
* Root of the AVL tree. | |||||
* @struct_name | * @struct_name | ||||
* Type of *child_struct. | |||||
* Type of *child_struct. | |||||
* @struct_member | * @struct_member | ||||
* Member of @struct_name type that is the AVL tree node. | |||||
* Member of @struct_name type that is the AVL tree node. | |||||
* | * | ||||
* Example: | * Example: | ||||
* | * | ||||
* struct int_wrapper { | * struct int_wrapper { | ||||
* int data; | |||||
* struct avl_tree_node index_node; | |||||
* int data; | |||||
* struct avl_tree_node index_node; | |||||
* }; | * }; | ||||
* | * | ||||
* void print_ints(struct avl_tree_node *root) | * void print_ints(struct avl_tree_node *root) | ||||
* { | * { | ||||
* struct int_wrapper *i; | |||||
* struct int_wrapper *i; | |||||
* | * | ||||
* avl_tree_for_each_in_order(i, root, struct int_wrapper, index_node) | |||||
* printf("%d\n", i->data); | |||||
* avl_tree_for_each_in_order(i, root, struct int_wrapper, index_node) | |||||
* printf("%d\n", i->data); | |||||
* } | * } | ||||
*/ | */ | ||||
#define avl_tree_for_each_in_order(child_struct, root, \ | |||||
struct_name, struct_member) \ | |||||
for (struct avl_tree_node *_cur = \ | |||||
avl_tree_first_in_order(root); \ | |||||
_cur && ((child_struct) = \ | |||||
avl_tree_entry(_cur, struct_name, \ | |||||
struct_member), 1); \ | |||||
_cur = avl_tree_next_in_order(_cur)) | |||||
#define avl_tree_for_each_in_order(child_struct, root, struct_name, struct_member) \ | |||||
for (struct avl_tree_node *_cur = avl_tree_first_in_order(root); \ | |||||
_cur && \ | |||||
((child_struct) = avl_tree_entry(_cur, struct_name, struct_member), 1); \ | |||||
_cur = avl_tree_next_in_order(_cur)) | |||||
/* | /* | ||||
* Like avl_tree_for_each_in_order(), but uses the reverse order. | * Like avl_tree_for_each_in_order(), but uses the reverse order. | ||||
*/ | */ | ||||
#define avl_tree_for_each_in_reverse_order(child_struct, root, \ | |||||
struct_name, struct_member) \ | |||||
for (struct avl_tree_node *_cur = \ | |||||
avl_tree_last_in_order(root); \ | |||||
_cur && ((child_struct) = \ | |||||
avl_tree_entry(_cur, struct_name, \ | |||||
struct_member), 1); \ | |||||
_cur = avl_tree_prev_in_order(_cur)) | |||||
#define avl_tree_for_each_in_reverse_order( \ | |||||
child_struct, root, struct_name, struct_member) \ | |||||
for (struct avl_tree_node *_cur = avl_tree_last_in_order(root); \ | |||||
_cur && \ | |||||
((child_struct) = avl_tree_entry(_cur, struct_name, struct_member), 1); \ | |||||
_cur = avl_tree_prev_in_order(_cur)) | |||||
/* | /* | ||||
* Like avl_tree_for_each_in_order(), but iterates through the nodes in | * Like avl_tree_for_each_in_order(), but iterates through the nodes in | ||||
* postorder, so the current node may be deleted or freed. | * postorder, so the current node may be deleted or freed. | ||||
*/ | */ | ||||
#define avl_tree_for_each_in_postorder(child_struct, root, \ | |||||
struct_name, struct_member) \ | |||||
for (struct avl_tree_node *_cur = \ | |||||
avl_tree_first_in_postorder(root), *_parent; \ | |||||
_cur && ((child_struct) = \ | |||||
avl_tree_entry(_cur, struct_name, \ | |||||
struct_member), 1) \ | |||||
&& (_parent = avl_get_parent(_cur), 1); \ | |||||
_cur = avl_tree_next_in_postorder(_cur, _parent)) | |||||
#define avl_tree_for_each_in_postorder(child_struct, root, struct_name, struct_member) \ | |||||
for (struct avl_tree_node *_cur = avl_tree_first_in_postorder(root), *_parent; \ | |||||
_cur && \ | |||||
((child_struct) = avl_tree_entry(_cur, struct_name, struct_member), 1) && \ | |||||
(_parent = avl_get_parent(_cur), 1); \ | |||||
_cur = avl_tree_next_in_postorder(_cur, _parent)) | |||||
#endif /* _AVL_TREE_H_ */ | #endif /* _AVL_TREE_H_ */ |
@@ -23,55 +23,56 @@ SOFTWARE. | |||||
#pragma once | #pragma once | ||||
#ifndef PY_SSIZE_T_CLEAN | #ifndef PY_SSIZE_T_CLEAN | ||||
#define PY_SSIZE_T_CLEAN // See: https://docs.python.org/3/c-api/arg.html | |||||
# define PY_SSIZE_T_CLEAN // See: https://docs.python.org/3/c-api/arg.html | |||||
#endif | #endif | ||||
#include <Python.h> | #include <Python.h> | ||||
#include <structmember.h> | |||||
#include <bytesobject.h> | #include <bytesobject.h> | ||||
#include <structmember.h> | |||||
#include "avl_tree.h" | #include "avl_tree.h" | ||||
/* Compatibility macros */ | /* Compatibility macros */ | ||||
#ifndef uint64_t | #ifndef uint64_t | ||||
#define uint64_t unsigned PY_LONG_LONG | |||||
# define uint64_t unsigned PY_LONG_LONG | |||||
#endif | #endif | ||||
#define malloc PyObject_Malloc // XXX: yuck | |||||
#define malloc PyObject_Malloc // XXX: yuck | |||||
#define realloc PyObject_Realloc | #define realloc PyObject_Realloc | ||||
#define free PyObject_Free | #define free PyObject_Free | ||||
/* Unicode support macros */ | /* Unicode support macros */ | ||||
#define PyUnicode_FROM_SINGLE(chr) \ | |||||
#define PyUnicode_FROM_SINGLE(chr) \ | |||||
PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &(chr), 1) | PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &(chr), 1) | ||||
/* Error handling macros */ | /* Error handling macros */ | ||||
#define BAD_ROUTE self->route_state | |||||
#define BAD_ROUTE_CONTEXT self->route_context | |||||
#define FAIL_ROUTE(context) { \ | |||||
self->route_state = 1; \ | |||||
self->route_context = context; \ | |||||
} | |||||
#define RESET_ROUTE() self->route_state = 0 | |||||
#define BAD_ROUTE self->route_state | |||||
#define BAD_ROUTE_CONTEXT self->route_context | |||||
#define FAIL_ROUTE(context) \ | |||||
do { \ | |||||
self->route_state = 1; \ | |||||
self->route_context = context; \ | |||||
} while (0) | |||||
#define RESET_ROUTE() self->route_state = 0 | |||||
/* Shared globals */ | /* Shared globals */ | ||||
extern char** entitydefs; | |||||
extern char **entitydefs; | |||||
extern PyObject* NOARGS; | |||||
extern PyObject* definitions; | |||||
extern PyObject *NOARGS; | |||||
extern PyObject *definitions; | |||||
/* Structs */ | /* Structs */ | ||||
typedef struct { | typedef struct { | ||||
Py_ssize_t capacity; | Py_ssize_t capacity; | ||||
Py_ssize_t length; | Py_ssize_t length; | ||||
PyObject* object; | |||||
PyObject *object; | |||||
int kind; | int kind; | ||||
void* data; | |||||
void *data; | |||||
} Textbuffer; | } Textbuffer; | ||||
typedef struct { | typedef struct { | ||||
@@ -80,19 +81,19 @@ typedef struct { | |||||
} StackIdent; | } StackIdent; | ||||
struct Stack { | struct Stack { | ||||
PyObject* stack; | |||||
PyObject *stack; | |||||
uint64_t context; | uint64_t context; | ||||
Textbuffer* textbuffer; | |||||
Textbuffer *textbuffer; | |||||
StackIdent ident; | StackIdent ident; | ||||
struct Stack* next; | |||||
struct Stack *next; | |||||
}; | }; | ||||
typedef struct Stack Stack; | typedef struct Stack Stack; | ||||
typedef struct { | typedef struct { | ||||
PyObject* object; /* base PyUnicodeObject object */ | |||||
Py_ssize_t length; /* length of object, in code points */ | |||||
int kind; /* object's kind value */ | |||||
void* data; /* object's raw unicode buffer */ | |||||
PyObject *object; /* base PyUnicodeObject object */ | |||||
Py_ssize_t length; /* length of object, in code points */ | |||||
int kind; /* object's kind value */ | |||||
void *data; /* object's raw unicode buffer */ | |||||
} TokenizerInput; | } TokenizerInput; | ||||
typedef struct avl_tree_node avl_tree; | typedef struct avl_tree_node avl_tree; | ||||
@@ -104,13 +105,13 @@ typedef struct { | |||||
typedef struct { | typedef struct { | ||||
PyObject_HEAD | PyObject_HEAD | ||||
TokenizerInput text; /* text to tokenize */ | |||||
Stack* topstack; /* topmost stack */ | |||||
Py_ssize_t head; /* current position in text */ | |||||
int global; /* global context */ | |||||
int depth; /* stack recursion depth */ | |||||
int route_state; /* whether a BadRoute has been triggered */ | |||||
uint64_t route_context; /* context when the last BadRoute was triggered */ | |||||
avl_tree* bad_routes; /* stack idents for routes known to fail */ | |||||
int skip_style_tags; /* temp fix for the sometimes broken tag parser */ | |||||
TokenizerInput text; /* text to tokenize */ | |||||
Stack *topstack; /* topmost stack */ | |||||
Py_ssize_t head; /* current position in text */ | |||||
int global; /* global context */ | |||||
int depth; /* stack recursion depth */ | |||||
int route_state; /* whether a BadRoute has been triggered */ | |||||
uint64_t route_context; /* context when the last BadRoute was triggered */ | |||||
avl_tree *bad_routes; /* stack idents for routes known to fail */ | |||||
int skip_style_tags; /* temp fix for the sometimes broken tag parser */ | |||||
} Tokenizer; | } Tokenizer; |
@@ -89,11 +89,17 @@ SOFTWARE. | |||||
/* Aggregate contexts */ | /* Aggregate contexts */ | ||||
#define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE | LC_TABLE_OPEN) | |||||
#define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) | |||||
#define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE | LC_TABLE_ROW_OPEN) | |||||
#define AGG_NO_WIKILINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI) | |||||
#define AGG_NO_EXT_LINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK) | |||||
#define AGG_FAIL \ | |||||
(LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | \ | |||||
LC_TAG | LC_STYLE | LC_TABLE_OPEN) | |||||
#define AGG_UNSAFE \ | |||||
(LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | \ | |||||
LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) | |||||
#define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE | LC_TABLE_ROW_OPEN) | |||||
#define AGG_NO_WIKILINKS \ | |||||
(LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI) | |||||
#define AGG_NO_EXT_LINKS \ | |||||
(LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK) | |||||
/* Tag contexts */ | /* Tag contexts */ | ||||
@@ -27,7 +27,8 @@ SOFTWARE. | |||||
See the Python version for data sources. | See the Python version for data sources. | ||||
*/ | */ | ||||
static const char* URI_SCHEMES[] = { | |||||
// clang-format off | |||||
static const char *URI_SCHEMES[] = { | |||||
"bitcoin", | "bitcoin", | ||||
"ftp", | "ftp", | ||||
"ftps", | "ftps", | ||||
@@ -55,10 +56,10 @@ static const char* URI_SCHEMES[] = { | |||||
"urn", | "urn", | ||||
"worldwind", | "worldwind", | ||||
"xmpp", | "xmpp", | ||||
NULL, | |||||
NULL, | |||||
}; | }; | ||||
static const char* URI_SCHEMES_AUTHORITY_OPTIONAL[] = { | |||||
static const char *URI_SCHEMES_AUTHORITY_OPTIONAL[] = { | |||||
"bitcoin", | "bitcoin", | ||||
"geo", | "geo", | ||||
"magnet", | "magnet", | ||||
@@ -73,7 +74,7 @@ static const char* URI_SCHEMES_AUTHORITY_OPTIONAL[] = { | |||||
NULL, | NULL, | ||||
}; | }; | ||||
static const char* PARSER_BLACKLIST[] = { | |||||
static const char *PARSER_BLACKLIST[] = { | |||||
"categorytree", | "categorytree", | ||||
"ce", | "ce", | ||||
"chem", | "chem", | ||||
@@ -93,32 +94,32 @@ static const char* PARSER_BLACKLIST[] = { | |||||
"timeline", | "timeline", | ||||
NULL, | NULL, | ||||
}; | }; | ||||
// clang-format on | |||||
static const char* SINGLE[] = { | |||||
"br", "wbr", "hr", "meta", "link", "img", "li", "dt", "dd", "th", "td", | |||||
"tr", NULL | |||||
}; | |||||
static const char *SINGLE[] = { | |||||
"br", "wbr", "hr", "meta", "link", "img", "li", "dt", "dd", "th", "td", "tr", NULL}; | |||||
static const char* SINGLE_ONLY[] = { | |||||
"br", "wbr", "hr", "meta", "link", "img", NULL | |||||
}; | |||||
static const char *SINGLE_ONLY[] = {"br", "wbr", "hr", "meta", "link", "img", NULL}; | |||||
/* | /* | ||||
Convert a PyUnicodeObject to a lowercase ASCII char* array and store it in | Convert a PyUnicodeObject to a lowercase ASCII char* array and store it in | ||||
the second argument. The caller must free the return value when finished. | the second argument. The caller must free the return value when finished. | ||||
If the return value is NULL, the conversion failed and *string is not set. | If the return value is NULL, the conversion failed and *string is not set. | ||||
*/ | */ | ||||
static PyObject* unicode_to_lcase_ascii(PyObject *input, const char **string) | |||||
static PyObject * | |||||
unicode_to_lcase_ascii(PyObject *input, const char **string) | |||||
{ | { | ||||
PyObject *lower = PyObject_CallMethod(input, "lower", NULL), *bytes; | PyObject *lower = PyObject_CallMethod(input, "lower", NULL), *bytes; | ||||
if (!lower) | |||||
if (!lower) { | |||||
return NULL; | return NULL; | ||||
} | |||||
bytes = PyUnicode_AsASCIIString(lower); | bytes = PyUnicode_AsASCIIString(lower); | ||||
Py_DECREF(lower); | Py_DECREF(lower); | ||||
if (!bytes) { | if (!bytes) { | ||||
if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) | |||||
if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) { | |||||
PyErr_Clear(); | PyErr_Clear(); | ||||
} | |||||
return NULL; | return NULL; | ||||
} | } | ||||
*string = PyBytes_AS_STRING(bytes); | *string = PyBytes_AS_STRING(bytes); | ||||
@@ -128,14 +129,16 @@ static PyObject* unicode_to_lcase_ascii(PyObject *input, const char **string) | |||||
/* | /* | ||||
Return whether a PyUnicodeObject is in a list of lowercase ASCII strings. | Return whether a PyUnicodeObject is in a list of lowercase ASCII strings. | ||||
*/ | */ | ||||
static int unicode_in_string_list(PyObject *input, const char **list) | |||||
static int | |||||
unicode_in_string_list(PyObject *input, const char **list) | |||||
{ | { | ||||
const char *string; | const char *string; | ||||
PyObject *temp = unicode_to_lcase_ascii(input, &string); | PyObject *temp = unicode_to_lcase_ascii(input, &string); | ||||
int retval = 0; | int retval = 0; | ||||
if (!temp) | |||||
if (!temp) { | |||||
return 0; | return 0; | ||||
} | |||||
while (*list) { | while (*list) { | ||||
if (!strcmp(*(list++), string)) { | if (!strcmp(*(list++), string)) { | ||||
@@ -144,7 +147,7 @@ static int unicode_in_string_list(PyObject *input, const char **list) | |||||
} | } | ||||
} | } | ||||
end: | |||||
end: | |||||
Py_DECREF(temp); | Py_DECREF(temp); | ||||
return retval; | return retval; | ||||
} | } | ||||
@@ -152,7 +155,8 @@ static int unicode_in_string_list(PyObject *input, const char **list) | |||||
/* | /* | ||||
Return if the given tag's contents should be passed to the parser. | Return if the given tag's contents should be passed to the parser. | ||||
*/ | */ | ||||
int is_parsable(PyObject *tag) | |||||
int | |||||
is_parsable(PyObject *tag) | |||||
{ | { | ||||
return !unicode_in_string_list(tag, PARSER_BLACKLIST); | return !unicode_in_string_list(tag, PARSER_BLACKLIST); | ||||
} | } | ||||
@@ -160,7 +164,8 @@ int is_parsable(PyObject *tag) | |||||
/* | /* | ||||
Return whether or not the given tag can exist without a close tag. | Return whether or not the given tag can exist without a close tag. | ||||
*/ | */ | ||||
int is_single(PyObject *tag) | |||||
int | |||||
is_single(PyObject *tag) | |||||
{ | { | ||||
return unicode_in_string_list(tag, SINGLE); | return unicode_in_string_list(tag, SINGLE); | ||||
} | } | ||||
@@ -168,7 +173,8 @@ int is_single(PyObject *tag) | |||||
/* | /* | ||||
Return whether or not the given tag must exist without a close tag. | Return whether or not the given tag must exist without a close tag. | ||||
*/ | */ | ||||
int is_single_only(PyObject *tag) | |||||
int | |||||
is_single_only(PyObject *tag) | |||||
{ | { | ||||
return unicode_in_string_list(tag, SINGLE_ONLY); | return unicode_in_string_list(tag, SINGLE_ONLY); | ||||
} | } | ||||
@@ -176,10 +182,12 @@ int is_single_only(PyObject *tag) | |||||
/* | /* | ||||
Return whether the given scheme is valid for external links. | Return whether the given scheme is valid for external links. | ||||
*/ | */ | ||||
int is_scheme(PyObject *scheme, int slashes) | |||||
int | |||||
is_scheme(PyObject *scheme, int slashes) | |||||
{ | { | ||||
if (slashes) | |||||
if (slashes) { | |||||
return unicode_in_string_list(scheme, URI_SCHEMES); | return unicode_in_string_list(scheme, URI_SCHEMES); | ||||
else | |||||
} else { | |||||
return unicode_in_string_list(scheme, URI_SCHEMES_AUTHORITY_OPTIONAL); | return unicode_in_string_list(scheme, URI_SCHEMES_AUTHORITY_OPTIONAL); | ||||
} | |||||
} | } |
@@ -28,12 +28,11 @@ SOFTWARE. | |||||
/* Functions */ | /* Functions */ | ||||
int is_parsable(PyObject*); | |||||
int is_single(PyObject*); | |||||
int is_single_only(PyObject*); | |||||
int is_scheme(PyObject*, int); | |||||
int is_parsable(PyObject *); | |||||
int is_single(PyObject *); | |||||
int is_single_only(PyObject *); | |||||
int is_scheme(PyObject *, int); | |||||
/* Macros */ | /* Macros */ | ||||
#define GET_HTML_TAG(markup) \ | |||||
(markup == ':' ? "dd" : markup == ';' ? "dt" : "li") | |||||
#define GET_HTML_TAG(markup) (markup == ':' ? "dd" : markup == ';' ? "dt" : "li") |
@@ -26,13 +26,14 @@ SOFTWARE. | |||||
/* | /* | ||||
Initialize a new TagData object. | Initialize a new TagData object. | ||||
*/ | */ | ||||
TagData* TagData_new(TokenizerInput* text) | |||||
TagData * | |||||
TagData_new(TokenizerInput *text) | |||||
{ | { | ||||
#define ALLOC_BUFFER(name) \ | |||||
name = Textbuffer_new(text); \ | |||||
if (!name) { \ | |||||
TagData_dealloc(self); \ | |||||
return NULL; \ | |||||
#define ALLOC_BUFFER(name) \ | |||||
name = Textbuffer_new(text); \ | |||||
if (!name) { \ | |||||
TagData_dealloc(self); \ | |||||
return NULL; \ | |||||
} | } | ||||
TagData *self = malloc(sizeof(TagData)); | TagData *self = malloc(sizeof(TagData)); | ||||
@@ -54,25 +55,30 @@ TagData* TagData_new(TokenizerInput* text) | |||||
/* | /* | ||||
Deallocate the given TagData object. | Deallocate the given TagData object. | ||||
*/ | */ | ||||
void TagData_dealloc(TagData* self) | |||||
void | |||||
TagData_dealloc(TagData *self) | |||||
{ | { | ||||
if (self->pad_first) | |||||
if (self->pad_first) { | |||||
Textbuffer_dealloc(self->pad_first); | Textbuffer_dealloc(self->pad_first); | ||||
if (self->pad_before_eq) | |||||
} | |||||
if (self->pad_before_eq) { | |||||
Textbuffer_dealloc(self->pad_before_eq); | Textbuffer_dealloc(self->pad_before_eq); | ||||
if (self->pad_after_eq) | |||||
} | |||||
if (self->pad_after_eq) { | |||||
Textbuffer_dealloc(self->pad_after_eq); | Textbuffer_dealloc(self->pad_after_eq); | ||||
} | |||||
free(self); | free(self); | ||||
} | } | ||||
/* | /* | ||||
Clear the internal buffers of the given TagData object. | Clear the internal buffers of the given TagData object. | ||||
*/ | */ | ||||
int TagData_reset_buffers(TagData* self) | |||||
int | |||||
TagData_reset_buffers(TagData *self) | |||||
{ | { | ||||
if (Textbuffer_reset(self->pad_first) || | |||||
Textbuffer_reset(self->pad_before_eq) || | |||||
Textbuffer_reset(self->pad_after_eq)) | |||||
if (Textbuffer_reset(self->pad_first) || Textbuffer_reset(self->pad_before_eq) || | |||||
Textbuffer_reset(self->pad_after_eq)) { | |||||
return -1; | return -1; | ||||
} | |||||
return 0; | return 0; | ||||
} | } |
@@ -29,15 +29,15 @@ SOFTWARE. | |||||
typedef struct { | typedef struct { | ||||
uint64_t context; | uint64_t context; | ||||
Textbuffer* pad_first; | |||||
Textbuffer* pad_before_eq; | |||||
Textbuffer* pad_after_eq; | |||||
Textbuffer *pad_first; | |||||
Textbuffer *pad_before_eq; | |||||
Textbuffer *pad_after_eq; | |||||
Py_UCS4 quoter; | Py_UCS4 quoter; | ||||
Py_ssize_t reset; | Py_ssize_t reset; | ||||
} TagData; | } TagData; | ||||
/* Functions */ | /* Functions */ | ||||
TagData* TagData_new(TokenizerInput*); | |||||
void TagData_dealloc(TagData*); | |||||
int TagData_reset_buffers(TagData*); | |||||
TagData *TagData_new(TokenizerInput *); | |||||
void TagData_dealloc(TagData *); | |||||
int TagData_reset_buffers(TagData *); |
@@ -23,20 +23,22 @@ SOFTWARE. | |||||
#include "textbuffer.h" | #include "textbuffer.h" | ||||
#define INITIAL_CAPACITY 32 | #define INITIAL_CAPACITY 32 | ||||
#define RESIZE_FACTOR 2 | |||||
#define CONCAT_EXTRA 32 | |||||
#define RESIZE_FACTOR 2 | |||||
#define CONCAT_EXTRA 32 | |||||
/* | /* | ||||
Internal allocation function for textbuffers. | Internal allocation function for textbuffers. | ||||
*/ | */ | ||||
static int internal_alloc(Textbuffer* self, Py_UCS4 maxchar) | |||||
static int | |||||
internal_alloc(Textbuffer *self, Py_UCS4 maxchar) | |||||
{ | { | ||||
self->capacity = INITIAL_CAPACITY; | self->capacity = INITIAL_CAPACITY; | ||||
self->length = 0; | self->length = 0; | ||||
self->object = PyUnicode_New(self->capacity, maxchar); | self->object = PyUnicode_New(self->capacity, maxchar); | ||||
if (!self->object) | |||||
if (!self->object) { | |||||
return -1; | return -1; | ||||
} | |||||
self->kind = PyUnicode_KIND(self->object); | self->kind = PyUnicode_KIND(self->object); | ||||
self->data = PyUnicode_DATA(self->object); | self->data = PyUnicode_DATA(self->object); | ||||
@@ -46,7 +48,8 @@ static int internal_alloc(Textbuffer* self, Py_UCS4 maxchar) | |||||
/* | /* | ||||
Internal deallocation function for textbuffers. | Internal deallocation function for textbuffers. | ||||
*/ | */ | ||||
static void internal_dealloc(Textbuffer* self) | |||||
static void | |||||
internal_dealloc(Textbuffer *self) | |||||
{ | { | ||||
Py_DECREF(self->object); | Py_DECREF(self->object); | ||||
} | } | ||||
@@ -54,14 +57,16 @@ static void internal_dealloc(Textbuffer* self) | |||||
/* | /* | ||||
Internal resize function. | Internal resize function. | ||||
*/ | */ | ||||
static int internal_resize(Textbuffer* self, Py_ssize_t new_cap) | |||||
static int | |||||
internal_resize(Textbuffer *self, Py_ssize_t new_cap) | |||||
{ | { | ||||
PyObject *newobj; | PyObject *newobj; | ||||
void *newdata; | void *newdata; | ||||
newobj = PyUnicode_New(new_cap, PyUnicode_MAX_CHAR_VALUE(self->object)); | newobj = PyUnicode_New(new_cap, PyUnicode_MAX_CHAR_VALUE(self->object)); | ||||
if (!newobj) | |||||
if (!newobj) { | |||||
return -1; | return -1; | ||||
} | |||||
newdata = PyUnicode_DATA(newobj); | newdata = PyUnicode_DATA(newobj); | ||||
memcpy(newdata, self->data, self->length * self->kind); | memcpy(newdata, self->data, self->length * self->kind); | ||||
Py_DECREF(self->object); | Py_DECREF(self->object); | ||||
@@ -75,22 +80,25 @@ static int internal_resize(Textbuffer* self, Py_ssize_t new_cap) | |||||
/* | /* | ||||
Create a new textbuffer object. | Create a new textbuffer object. | ||||
*/ | */ | ||||
Textbuffer* Textbuffer_new(TokenizerInput* text) | |||||
Textbuffer * | |||||
Textbuffer_new(TokenizerInput *text) | |||||
{ | { | ||||
Textbuffer* self = malloc(sizeof(Textbuffer)); | |||||
Textbuffer *self = malloc(sizeof(Textbuffer)); | |||||
Py_UCS4 maxchar = 0; | Py_UCS4 maxchar = 0; | ||||
maxchar = PyUnicode_MAX_CHAR_VALUE(text->object); | maxchar = PyUnicode_MAX_CHAR_VALUE(text->object); | ||||
if (!self) | |||||
if (!self) { | |||||
goto fail_nomem; | goto fail_nomem; | ||||
if (internal_alloc(self, maxchar) < 0) | |||||
} | |||||
if (internal_alloc(self, maxchar) < 0) { | |||||
goto fail_dealloc; | goto fail_dealloc; | ||||
} | |||||
return self; | return self; | ||||
fail_dealloc: | |||||
fail_dealloc: | |||||
free(self); | free(self); | ||||
fail_nomem: | |||||
fail_nomem: | |||||
PyErr_NoMemory(); | PyErr_NoMemory(); | ||||
return NULL; | return NULL; | ||||
} | } | ||||
@@ -98,7 +106,8 @@ Textbuffer* Textbuffer_new(TokenizerInput* text) | |||||
/* | /* | ||||
Deallocate the given textbuffer. | Deallocate the given textbuffer. | ||||
*/ | */ | ||||
void Textbuffer_dealloc(Textbuffer* self) | |||||
void | |||||
Textbuffer_dealloc(Textbuffer *self) | |||||
{ | { | ||||
internal_dealloc(self); | internal_dealloc(self); | ||||
free(self); | free(self); | ||||
@@ -107,26 +116,30 @@ void Textbuffer_dealloc(Textbuffer* self) | |||||
/* | /* | ||||
Reset a textbuffer to its initial, empty state. | Reset a textbuffer to its initial, empty state. | ||||
*/ | */ | ||||
int Textbuffer_reset(Textbuffer* self) | |||||
int | |||||
Textbuffer_reset(Textbuffer *self) | |||||
{ | { | ||||
Py_UCS4 maxchar = 0; | Py_UCS4 maxchar = 0; | ||||
maxchar = PyUnicode_MAX_CHAR_VALUE(self->object); | maxchar = PyUnicode_MAX_CHAR_VALUE(self->object); | ||||
internal_dealloc(self); | internal_dealloc(self); | ||||
if (internal_alloc(self, maxchar)) | |||||
if (internal_alloc(self, maxchar)) { | |||||
return -1; | return -1; | ||||
} | |||||
return 0; | return 0; | ||||
} | } | ||||
/* | /* | ||||
Write a Unicode codepoint to the given textbuffer. | Write a Unicode codepoint to the given textbuffer. | ||||
*/ | */ | ||||
int Textbuffer_write(Textbuffer* self, Py_UCS4 code) | |||||
int | |||||
Textbuffer_write(Textbuffer *self, Py_UCS4 code) | |||||
{ | { | ||||
if (self->length >= self->capacity) { | if (self->length >= self->capacity) { | ||||
if (internal_resize(self, self->capacity * RESIZE_FACTOR) < 0) | |||||
if (internal_resize(self, self->capacity * RESIZE_FACTOR) < 0) { | |||||
return -1; | return -1; | ||||
} | |||||
} | } | ||||
PyUnicode_WRITE(self->kind, self->data, self->length++, code); | PyUnicode_WRITE(self->kind, self->data, self->length++, code); | ||||
@@ -139,7 +152,8 @@ int Textbuffer_write(Textbuffer* self, Py_UCS4 code) | |||||
This function does not check for bounds. | This function does not check for bounds. | ||||
*/ | */ | ||||
Py_UCS4 Textbuffer_read(Textbuffer* self, Py_ssize_t index) | |||||
Py_UCS4 | |||||
Textbuffer_read(Textbuffer *self, Py_ssize_t index) | |||||
{ | { | ||||
return PyUnicode_READ(self->kind, self->data, index); | return PyUnicode_READ(self->kind, self->data, index); | ||||
} | } | ||||
@@ -147,7 +161,8 @@ Py_UCS4 Textbuffer_read(Textbuffer* self, Py_ssize_t index) | |||||
/* | /* | ||||
Return the contents of the textbuffer as a Python Unicode object. | Return the contents of the textbuffer as a Python Unicode object. | ||||
*/ | */ | ||||
PyObject* Textbuffer_render(Textbuffer* self) | |||||
PyObject * | |||||
Textbuffer_render(Textbuffer *self) | |||||
{ | { | ||||
return PyUnicode_FromKindAndData(self->kind, self->data, self->length); | return PyUnicode_FromKindAndData(self->kind, self->data, self->length); | ||||
} | } | ||||
@@ -155,17 +170,20 @@ PyObject* Textbuffer_render(Textbuffer* self) | |||||
/* | /* | ||||
Concatenate the 'other' textbuffer onto the end of the given textbuffer. | Concatenate the 'other' textbuffer onto the end of the given textbuffer. | ||||
*/ | */ | ||||
int Textbuffer_concat(Textbuffer* self, Textbuffer* other) | |||||
int | |||||
Textbuffer_concat(Textbuffer *self, Textbuffer *other) | |||||
{ | { | ||||
Py_ssize_t newlen = self->length + other->length; | Py_ssize_t newlen = self->length + other->length; | ||||
if (newlen > self->capacity) { | if (newlen > self->capacity) { | ||||
if (internal_resize(self, newlen + CONCAT_EXTRA) < 0) | |||||
if (internal_resize(self, newlen + CONCAT_EXTRA) < 0) { | |||||
return -1; | return -1; | ||||
} | |||||
} | } | ||||
assert(self->kind == other->kind); | assert(self->kind == other->kind); | ||||
memcpy(((Py_UCS1*) self->data) + self->kind * self->length, other->data, | |||||
memcpy(((Py_UCS1 *) self->data) + self->kind * self->length, | |||||
other->data, | |||||
other->length * other->kind); | other->length * other->kind); | ||||
self->length = newlen; | self->length = newlen; | ||||
@@ -175,15 +193,16 @@ int Textbuffer_concat(Textbuffer* self, Textbuffer* other) | |||||
/* | /* | ||||
Reverse the contents of the given textbuffer. | Reverse the contents of the given textbuffer. | ||||
*/ | */ | ||||
void Textbuffer_reverse(Textbuffer* self) | |||||
void | |||||
Textbuffer_reverse(Textbuffer *self) | |||||
{ | { | ||||
Py_ssize_t i, end = self->length - 1; | Py_ssize_t i, end = self->length - 1; | ||||
Py_UCS4 tmp; | Py_UCS4 tmp; | ||||
for (i = 0; i < self->length / 2; i++) { | for (i = 0; i < self->length / 2; i++) { | ||||
tmp = PyUnicode_READ(self->kind, self->data, i); | tmp = PyUnicode_READ(self->kind, self->data, i); | ||||
PyUnicode_WRITE(self->kind, self->data, i, | |||||
PyUnicode_READ(self->kind, self->data, end - i)); | |||||
PyUnicode_WRITE( | |||||
self->kind, self->data, i, PyUnicode_READ(self->kind, self->data, end - i)); | |||||
PyUnicode_WRITE(self->kind, self->data, end - i, tmp); | PyUnicode_WRITE(self->kind, self->data, end - i, tmp); | ||||
} | } | ||||
} | } |
@@ -26,11 +26,11 @@ SOFTWARE. | |||||
/* Functions */ | /* Functions */ | ||||
Textbuffer* Textbuffer_new(TokenizerInput*); | |||||
void Textbuffer_dealloc(Textbuffer*); | |||||
int Textbuffer_reset(Textbuffer*); | |||||
int Textbuffer_write(Textbuffer*, Py_UCS4); | |||||
Py_UCS4 Textbuffer_read(Textbuffer*, Py_ssize_t); | |||||
PyObject* Textbuffer_render(Textbuffer*); | |||||
int Textbuffer_concat(Textbuffer*, Textbuffer*); | |||||
void Textbuffer_reverse(Textbuffer*); | |||||
Textbuffer *Textbuffer_new(TokenizerInput *); | |||||
void Textbuffer_dealloc(Textbuffer *); | |||||
int Textbuffer_reset(Textbuffer *); | |||||
int Textbuffer_write(Textbuffer *, Py_UCS4); | |||||
Py_UCS4 Textbuffer_read(Textbuffer *, Py_ssize_t); | |||||
PyObject *Textbuffer_render(Textbuffer *); | |||||
int Textbuffer_concat(Textbuffer *, Textbuffer *); | |||||
void Textbuffer_reverse(Textbuffer *); |
@@ -25,11 +25,12 @@ SOFTWARE. | |||||
#include "common.h" | #include "common.h" | ||||
static const Py_UCS4 MARKERS[] = { | static const Py_UCS4 MARKERS[] = { | ||||
'{', '}', '[', ']', '<', '>', '|', '=', '&', '\'', '#', '*', ';', ':', '/', | |||||
'-', '!', '\n', '\0'}; | |||||
'{', '}', '[', ']', '<', '>', '|', '=', '&', '\'', | |||||
'#', '*', ';', ':', '/', '-', '!', '\n', '\0', | |||||
}; | |||||
#define NUM_MARKERS 19 | #define NUM_MARKERS 19 | ||||
/* Functions */ | /* Functions */ | ||||
PyObject* Tokenizer_parse(Tokenizer*, uint64_t, int); | |||||
PyObject *Tokenizer_parse(Tokenizer *, uint64_t, int); |
@@ -27,9 +27,10 @@ SOFTWARE. | |||||
/* | /* | ||||
Add a new token stack, context, and textbuffer to the list. | Add a new token stack, context, and textbuffer to the list. | ||||
*/ | */ | ||||
int Tokenizer_push(Tokenizer* self, uint64_t context) | |||||
int | |||||
Tokenizer_push(Tokenizer *self, uint64_t context) | |||||
{ | { | ||||
Stack* top = malloc(sizeof(Stack)); | |||||
Stack *top = malloc(sizeof(Stack)); | |||||
if (!top) { | if (!top) { | ||||
PyErr_NoMemory(); | PyErr_NoMemory(); | ||||
@@ -38,8 +39,9 @@ int Tokenizer_push(Tokenizer* self, uint64_t context) | |||||
top->stack = PyList_New(0); | top->stack = PyList_New(0); | ||||
top->context = context; | top->context = context; | ||||
top->textbuffer = Textbuffer_new(&self->text); | top->textbuffer = Textbuffer_new(&self->text); | ||||
if (!top->textbuffer) | |||||
if (!top->textbuffer) { | |||||
return -1; | return -1; | ||||
} | |||||
top->ident.head = self->head; | top->ident.head = self->head; | ||||
top->ident.context = context; | top->ident.context = context; | ||||
top->next = self->topstack; | top->next = self->topstack; | ||||
@@ -51,16 +53,19 @@ int Tokenizer_push(Tokenizer* self, uint64_t context) | |||||
/* | /* | ||||
Push the textbuffer onto the stack as a Text node and clear it. | Push the textbuffer onto the stack as a Text node and clear it. | ||||
*/ | */ | ||||
int Tokenizer_push_textbuffer(Tokenizer* self) | |||||
int | |||||
Tokenizer_push_textbuffer(Tokenizer *self) | |||||
{ | { | ||||
PyObject *text, *kwargs, *token; | PyObject *text, *kwargs, *token; | ||||
Textbuffer* buffer = self->topstack->textbuffer; | |||||
Textbuffer *buffer = self->topstack->textbuffer; | |||||
if (buffer->length == 0) | |||||
if (buffer->length == 0) { | |||||
return 0; | return 0; | ||||
} | |||||
text = Textbuffer_render(buffer); | text = Textbuffer_render(buffer); | ||||
if (!text) | |||||
if (!text) { | |||||
return -1; | return -1; | ||||
} | |||||
kwargs = PyDict_New(); | kwargs = PyDict_New(); | ||||
if (!kwargs) { | if (!kwargs) { | ||||
Py_DECREF(text); | Py_DECREF(text); | ||||
@@ -70,24 +75,27 @@ int Tokenizer_push_textbuffer(Tokenizer* self) | |||||
Py_DECREF(text); | Py_DECREF(text); | ||||
token = PyObject_Call(Text, NOARGS, kwargs); | token = PyObject_Call(Text, NOARGS, kwargs); | ||||
Py_DECREF(kwargs); | Py_DECREF(kwargs); | ||||
if (!token) | |||||
if (!token) { | |||||
return -1; | return -1; | ||||
} | |||||
if (PyList_Append(self->topstack->stack, token)) { | if (PyList_Append(self->topstack->stack, token)) { | ||||
Py_DECREF(token); | Py_DECREF(token); | ||||
return -1; | return -1; | ||||
} | } | ||||
Py_DECREF(token); | Py_DECREF(token); | ||||
if (Textbuffer_reset(buffer)) | |||||
if (Textbuffer_reset(buffer)) { | |||||
return -1; | return -1; | ||||
} | |||||
return 0; | return 0; | ||||
} | } | ||||
/* | /* | ||||
Pop and deallocate the top token stack/context/textbuffer. | Pop and deallocate the top token stack/context/textbuffer. | ||||
*/ | */ | ||||
void Tokenizer_delete_top_of_stack(Tokenizer* self) | |||||
void | |||||
Tokenizer_delete_top_of_stack(Tokenizer *self) | |||||
{ | { | ||||
Stack* top = self->topstack; | |||||
Stack *top = self->topstack; | |||||
Py_DECREF(top->stack); | Py_DECREF(top->stack); | ||||
Textbuffer_dealloc(top->textbuffer); | Textbuffer_dealloc(top->textbuffer); | ||||
@@ -99,12 +107,14 @@ void Tokenizer_delete_top_of_stack(Tokenizer* self) | |||||
/* | /* | ||||
Pop the current stack/context/textbuffer, returing the stack. | Pop the current stack/context/textbuffer, returing the stack. | ||||
*/ | */ | ||||
PyObject* Tokenizer_pop(Tokenizer* self) | |||||
PyObject * | |||||
Tokenizer_pop(Tokenizer *self) | |||||
{ | { | ||||
PyObject* stack; | |||||
PyObject *stack; | |||||
if (Tokenizer_push_textbuffer(self)) | |||||
if (Tokenizer_push_textbuffer(self)) { | |||||
return NULL; | return NULL; | ||||
} | |||||
stack = self->topstack->stack; | stack = self->topstack->stack; | ||||
Py_INCREF(stack); | Py_INCREF(stack); | ||||
Tokenizer_delete_top_of_stack(self); | Tokenizer_delete_top_of_stack(self); | ||||
@@ -115,13 +125,15 @@ PyObject* Tokenizer_pop(Tokenizer* self) | |||||
Pop the current stack/context/textbuffer, returing the stack. We will also | Pop the current stack/context/textbuffer, returing the stack. We will also | ||||
replace the underlying stack's context with the current stack's. | replace the underlying stack's context with the current stack's. | ||||
*/ | */ | ||||
PyObject* Tokenizer_pop_keeping_context(Tokenizer* self) | |||||
PyObject * | |||||
Tokenizer_pop_keeping_context(Tokenizer *self) | |||||
{ | { | ||||
PyObject* stack; | |||||
PyObject *stack; | |||||
uint64_t context; | uint64_t context; | ||||
if (Tokenizer_push_textbuffer(self)) | |||||
if (Tokenizer_push_textbuffer(self)) { | |||||
return NULL; | return NULL; | ||||
} | |||||
stack = self->topstack->stack; | stack = self->topstack->stack; | ||||
Py_INCREF(stack); | Py_INCREF(stack); | ||||
context = self->topstack->context; | context = self->topstack->context; | ||||
@@ -133,16 +145,18 @@ PyObject* Tokenizer_pop_keeping_context(Tokenizer* self) | |||||
/* | /* | ||||
Compare two route_tree_nodes that are in their avl_tree_node forms. | Compare two route_tree_nodes that are in their avl_tree_node forms. | ||||
*/ | */ | ||||
static int compare_nodes( | |||||
const struct avl_tree_node* na, const struct avl_tree_node* nb) | |||||
static int | |||||
compare_nodes(const struct avl_tree_node *na, const struct avl_tree_node *nb) | |||||
{ | { | ||||
route_tree_node *a = avl_tree_entry(na, route_tree_node, node); | route_tree_node *a = avl_tree_entry(na, route_tree_node, node); | ||||
route_tree_node *b = avl_tree_entry(nb, route_tree_node, node); | route_tree_node *b = avl_tree_entry(nb, route_tree_node, node); | ||||
if (a->id.head < b->id.head) | |||||
if (a->id.head < b->id.head) { | |||||
return -1; | return -1; | ||||
if (a->id.head > b->id.head) | |||||
} | |||||
if (a->id.head > b->id.head) { | |||||
return 1; | return 1; | ||||
} | |||||
return (a->id.context > b->id.context) - (a->id.context < b->id.context); | return (a->id.context > b->id.context) - (a->id.context < b->id.context); | ||||
} | } | ||||
@@ -152,13 +166,15 @@ static int compare_nodes( | |||||
This will be noticed when calling Tokenizer_check_route with the same head | This will be noticed when calling Tokenizer_check_route with the same head | ||||
and context, and the route will be failed immediately. | and context, and the route will be failed immediately. | ||||
*/ | */ | ||||
void Tokenizer_memoize_bad_route(Tokenizer *self) | |||||
void | |||||
Tokenizer_memoize_bad_route(Tokenizer *self) | |||||
{ | { | ||||
route_tree_node *node = malloc(sizeof(route_tree_node)); | route_tree_node *node = malloc(sizeof(route_tree_node)); | ||||
if (node) { | if (node) { | ||||
node->id = self->topstack->ident; | node->id = self->topstack->ident; | ||||
if (avl_tree_insert(&self->bad_routes, &node->node, compare_nodes)) | |||||
if (avl_tree_insert(&self->bad_routes, &node->node, compare_nodes)) { | |||||
free(node); | free(node); | ||||
} | |||||
} | } | ||||
} | } | ||||
@@ -168,10 +184,11 @@ void Tokenizer_memoize_bad_route(Tokenizer *self) | |||||
ident of the failed stack so future parsing attempts down this route can be | ident of the failed stack so future parsing attempts down this route can be | ||||
stopped early. | stopped early. | ||||
*/ | */ | ||||
void* Tokenizer_fail_route(Tokenizer* self) | |||||
void * | |||||
Tokenizer_fail_route(Tokenizer *self) | |||||
{ | { | ||||
uint64_t context = self->topstack->context; | uint64_t context = self->topstack->context; | ||||
PyObject* stack; | |||||
PyObject *stack; | |||||
Tokenizer_memoize_bad_route(self); | Tokenizer_memoize_bad_route(self); | ||||
stack = Tokenizer_pop(self); | stack = Tokenizer_pop(self); | ||||
@@ -193,10 +210,11 @@ void* Tokenizer_fail_route(Tokenizer* self) | |||||
but this would introduce too much overhead in C tokenizer due to the need | but this would introduce too much overhead in C tokenizer due to the need | ||||
to check for a bad route after every call to Tokenizer_push.) | to check for a bad route after every call to Tokenizer_push.) | ||||
*/ | */ | ||||
int Tokenizer_check_route(Tokenizer* self, uint64_t context) | |||||
int | |||||
Tokenizer_check_route(Tokenizer *self, uint64_t context) | |||||
{ | { | ||||
StackIdent ident = {self->head, context}; | StackIdent ident = {self->head, context}; | ||||
struct avl_tree_node *node = (struct avl_tree_node*) (&ident + 1); | |||||
struct avl_tree_node *node = (struct avl_tree_node *) (&ident + 1); | |||||
if (avl_tree_lookup_node(self->bad_routes, node, compare_nodes)) { | if (avl_tree_lookup_node(self->bad_routes, node, compare_nodes)) { | ||||
FAIL_ROUTE(context); | FAIL_ROUTE(context); | ||||
@@ -209,7 +227,8 @@ int Tokenizer_check_route(Tokenizer* self, uint64_t context) | |||||
Free the tokenizer's bad route cache tree. Intended to be called by the | Free the tokenizer's bad route cache tree. Intended to be called by the | ||||
main tokenizer function after parsing is finished. | main tokenizer function after parsing is finished. | ||||
*/ | */ | ||||
void Tokenizer_free_bad_route_tree(Tokenizer *self) | |||||
void | |||||
Tokenizer_free_bad_route_tree(Tokenizer *self) | |||||
{ | { | ||||
struct avl_tree_node *cur = avl_tree_first_in_postorder(self->bad_routes); | struct avl_tree_node *cur = avl_tree_first_in_postorder(self->bad_routes); | ||||
struct avl_tree_node *parent; | struct avl_tree_node *parent; | ||||
@@ -225,17 +244,20 @@ void Tokenizer_free_bad_route_tree(Tokenizer *self) | |||||
/* | /* | ||||
Write a token to the current token stack. | Write a token to the current token stack. | ||||
*/ | */ | ||||
int Tokenizer_emit_token(Tokenizer* self, PyObject* token, int first) | |||||
int | |||||
Tokenizer_emit_token(Tokenizer *self, PyObject *token, int first) | |||||
{ | { | ||||
PyObject* instance; | |||||
PyObject *instance; | |||||
if (Tokenizer_push_textbuffer(self)) | |||||
if (Tokenizer_push_textbuffer(self)) { | |||||
return -1; | return -1; | ||||
} | |||||
instance = PyObject_CallObject(token, NULL); | instance = PyObject_CallObject(token, NULL); | ||||
if (!instance) | |||||
if (!instance) { | |||||
return -1; | return -1; | ||||
if (first ? PyList_Insert(self->topstack->stack, 0, instance) : | |||||
PyList_Append(self->topstack->stack, instance)) { | |||||
} | |||||
if (first ? PyList_Insert(self->topstack->stack, 0, instance) | |||||
: PyList_Append(self->topstack->stack, instance)) { | |||||
Py_DECREF(instance); | Py_DECREF(instance); | ||||
return -1; | return -1; | ||||
} | } | ||||
@@ -247,10 +269,13 @@ int Tokenizer_emit_token(Tokenizer* self, PyObject* token, int first) | |||||
Write a token to the current token stack, with kwargs. Steals a reference | Write a token to the current token stack, with kwargs. Steals a reference | ||||
to kwargs. | to kwargs. | ||||
*/ | */ | ||||
int Tokenizer_emit_token_kwargs(Tokenizer* self, PyObject* token, | |||||
PyObject* kwargs, int first) | |||||
int | |||||
Tokenizer_emit_token_kwargs(Tokenizer *self, | |||||
PyObject *token, | |||||
PyObject *kwargs, | |||||
int first) | |||||
{ | { | ||||
PyObject* instance; | |||||
PyObject *instance; | |||||
if (Tokenizer_push_textbuffer(self)) { | if (Tokenizer_push_textbuffer(self)) { | ||||
Py_DECREF(kwargs); | Py_DECREF(kwargs); | ||||
@@ -261,8 +286,8 @@ int Tokenizer_emit_token_kwargs(Tokenizer* self, PyObject* token, | |||||
Py_DECREF(kwargs); | Py_DECREF(kwargs); | ||||
return -1; | return -1; | ||||
} | } | ||||
if (first ? PyList_Insert(self->topstack->stack, 0, instance): | |||||
PyList_Append(self->topstack->stack, instance)) { | |||||
if (first ? PyList_Insert(self->topstack->stack, 0, instance) | |||||
: PyList_Append(self->topstack->stack, instance)) { | |||||
Py_DECREF(instance); | Py_DECREF(instance); | ||||
Py_DECREF(kwargs); | Py_DECREF(kwargs); | ||||
return -1; | return -1; | ||||
@@ -275,7 +300,8 @@ int Tokenizer_emit_token_kwargs(Tokenizer* self, PyObject* token, | |||||
/* | /* | ||||
Write a Unicode codepoint to the current textbuffer. | Write a Unicode codepoint to the current textbuffer. | ||||
*/ | */ | ||||
int Tokenizer_emit_char(Tokenizer* self, Py_UCS4 code) | |||||
int | |||||
Tokenizer_emit_char(Tokenizer *self, Py_UCS4 code) | |||||
{ | { | ||||
return Textbuffer_write(self->topstack->textbuffer, code); | return Textbuffer_write(self->topstack->textbuffer, code); | ||||
} | } | ||||
@@ -283,13 +309,15 @@ int Tokenizer_emit_char(Tokenizer* self, Py_UCS4 code) | |||||
/* | /* | ||||
Write a string of text to the current textbuffer. | Write a string of text to the current textbuffer. | ||||
*/ | */ | ||||
int Tokenizer_emit_text(Tokenizer* self, const char* text) | |||||
int | |||||
Tokenizer_emit_text(Tokenizer *self, const char *text) | |||||
{ | { | ||||
int i = 0; | int i = 0; | ||||
while (text[i]) { | while (text[i]) { | ||||
if (Tokenizer_emit_char(self, text[i])) | |||||
if (Tokenizer_emit_char(self, text[i])) { | |||||
return -1; | return -1; | ||||
} | |||||
i++; | i++; | ||||
} | } | ||||
return 0; | return 0; | ||||
@@ -299,7 +327,8 @@ int Tokenizer_emit_text(Tokenizer* self, const char* text) | |||||
Write the contents of another textbuffer to the current textbuffer, | Write the contents of another textbuffer to the current textbuffer, | ||||
deallocating it in the process. | deallocating it in the process. | ||||
*/ | */ | ||||
int Tokenizer_emit_textbuffer(Tokenizer* self, Textbuffer* buffer) | |||||
int | |||||
Tokenizer_emit_textbuffer(Tokenizer *self, Textbuffer *buffer) | |||||
{ | { | ||||
int retval = Textbuffer_concat(self->topstack->textbuffer, buffer); | int retval = Textbuffer_concat(self->topstack->textbuffer, buffer); | ||||
Textbuffer_dealloc(buffer); | Textbuffer_dealloc(buffer); | ||||
@@ -309,55 +338,63 @@ int Tokenizer_emit_textbuffer(Tokenizer* self, Textbuffer* buffer) | |||||
/* | /* | ||||
Write a series of tokens to the current stack at once. | Write a series of tokens to the current stack at once. | ||||
*/ | */ | ||||
int Tokenizer_emit_all(Tokenizer* self, PyObject* tokenlist) | |||||
int | |||||
Tokenizer_emit_all(Tokenizer *self, PyObject *tokenlist) | |||||
{ | { | ||||
int pushed = 0; | int pushed = 0; | ||||
PyObject *stack, *token, *left, *right, *text; | PyObject *stack, *token, *left, *right, *text; | ||||
Textbuffer* buffer; | |||||
Textbuffer *buffer; | |||||
Py_ssize_t size; | Py_ssize_t size; | ||||
if (PyList_GET_SIZE(tokenlist) > 0) { | if (PyList_GET_SIZE(tokenlist) > 0) { | ||||
token = PyList_GET_ITEM(tokenlist, 0); | token = PyList_GET_ITEM(tokenlist, 0); | ||||
switch (PyObject_IsInstance(token, Text)) { | switch (PyObject_IsInstance(token, Text)) { | ||||
case 0: | |||||
case 0: | |||||
break; | |||||
case 1: { | |||||
pushed = 1; | |||||
buffer = self->topstack->textbuffer; | |||||
if (buffer->length == 0) { | |||||
break; | break; | ||||
case 1: { | |||||
pushed = 1; | |||||
buffer = self->topstack->textbuffer; | |||||
if (buffer->length == 0) | |||||
break; | |||||
left = Textbuffer_render(buffer); | |||||
if (!left) | |||||
return -1; | |||||
right = PyObject_GetAttrString(token, "text"); | |||||
if (!right) | |||||
return -1; | |||||
text = PyUnicode_Concat(left, right); | |||||
Py_DECREF(left); | |||||
Py_DECREF(right); | |||||
if (!text) | |||||
return -1; | |||||
if (PyObject_SetAttrString(token, "text", text)) { | |||||
Py_DECREF(text); | |||||
return -1; | |||||
} | |||||
} | |||||
left = Textbuffer_render(buffer); | |||||
if (!left) { | |||||
return -1; | |||||
} | |||||
right = PyObject_GetAttrString(token, "text"); | |||||
if (!right) { | |||||
return -1; | |||||
} | |||||
text = PyUnicode_Concat(left, right); | |||||
Py_DECREF(left); | |||||
Py_DECREF(right); | |||||
if (!text) { | |||||
return -1; | |||||
} | |||||
if (PyObject_SetAttrString(token, "text", text)) { | |||||
Py_DECREF(text); | Py_DECREF(text); | ||||
if (Textbuffer_reset(buffer)) | |||||
return -1; | |||||
break; | |||||
return -1; | |||||
} | } | ||||
case -1: | |||||
Py_DECREF(text); | |||||
if (Textbuffer_reset(buffer)) { | |||||
return -1; | return -1; | ||||
} | |||||
break; | |||||
} | |||||
case -1: | |||||
return -1; | |||||
} | } | ||||
} | } | ||||
if (!pushed) { | if (!pushed) { | ||||
if (Tokenizer_push_textbuffer(self)) | |||||
if (Tokenizer_push_textbuffer(self)) { | |||||
return -1; | return -1; | ||||
} | |||||
} | } | ||||
stack = self->topstack->stack; | stack = self->topstack->stack; | ||||
size = PyList_GET_SIZE(stack); | size = PyList_GET_SIZE(stack); | ||||
if (PyList_SetSlice(stack, size, size, tokenlist)) | |||||
if (PyList_SetSlice(stack, size, size, tokenlist)) { | |||||
return -1; | return -1; | ||||
} | |||||
return 0; | return 0; | ||||
} | } | ||||
@@ -365,9 +402,10 @@ int Tokenizer_emit_all(Tokenizer* self, PyObject* tokenlist) | |||||
Pop the current stack, write text, and then write the stack. 'text' is a | Pop the current stack, write text, and then write the stack. 'text' is a | ||||
NULL-terminated array of chars. | NULL-terminated array of chars. | ||||
*/ | */ | ||||
int Tokenizer_emit_text_then_stack(Tokenizer* self, const char* text) | |||||
int | |||||
Tokenizer_emit_text_then_stack(Tokenizer *self, const char *text) | |||||
{ | { | ||||
PyObject* stack = Tokenizer_pop(self); | |||||
PyObject *stack = Tokenizer_pop(self); | |||||
if (Tokenizer_emit_text(self, text)) { | if (Tokenizer_emit_text(self, text)) { | ||||
Py_DECREF(stack); | Py_DECREF(stack); | ||||
@@ -389,7 +427,8 @@ int Tokenizer_emit_text_then_stack(Tokenizer* self, const char* text) | |||||
/* | /* | ||||
Internal function to read the codepoint at the given index from the input. | Internal function to read the codepoint at the given index from the input. | ||||
*/ | */ | ||||
static Py_UCS4 read_codepoint(TokenizerInput* text, Py_ssize_t index) | |||||
static Py_UCS4 | |||||
read_codepoint(TokenizerInput *text, Py_ssize_t index) | |||||
{ | { | ||||
return PyUnicode_READ(text->kind, text->data, index); | return PyUnicode_READ(text->kind, text->data, index); | ||||
} | } | ||||
@@ -397,24 +436,28 @@ static Py_UCS4 read_codepoint(TokenizerInput* text, Py_ssize_t index) | |||||
/* | /* | ||||
Read the value at a relative point in the wikicode, forwards. | Read the value at a relative point in the wikicode, forwards. | ||||
*/ | */ | ||||
Py_UCS4 Tokenizer_read(Tokenizer* self, Py_ssize_t delta) | |||||
Py_UCS4 | |||||
Tokenizer_read(Tokenizer *self, Py_ssize_t delta) | |||||
{ | { | ||||
Py_ssize_t index = self->head + delta; | Py_ssize_t index = self->head + delta; | ||||
if (index >= self->text.length) | |||||
if (index >= self->text.length) { | |||||
return '\0'; | return '\0'; | ||||
} | |||||
return read_codepoint(&self->text, index); | return read_codepoint(&self->text, index); | ||||
} | } | ||||
/* | /* | ||||
Read the value at a relative point in the wikicode, backwards. | Read the value at a relative point in the wikicode, backwards. | ||||
*/ | */ | ||||
Py_UCS4 Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) | |||||
Py_UCS4 | |||||
Tokenizer_read_backwards(Tokenizer *self, Py_ssize_t delta) | |||||
{ | { | ||||
Py_ssize_t index; | Py_ssize_t index; | ||||
if (delta > self->head) | |||||
if (delta > self->head) { | |||||
return '\0'; | return '\0'; | ||||
} | |||||
index = self->head - delta; | index = self->head - delta; | ||||
return read_codepoint(&self->text, index); | return read_codepoint(&self->text, index); | ||||
} | } |
@@ -26,41 +26,38 @@ SOFTWARE. | |||||
/* Functions */ | /* Functions */ | ||||
int Tokenizer_push(Tokenizer*, uint64_t); | |||||
int Tokenizer_push_textbuffer(Tokenizer*); | |||||
void Tokenizer_delete_top_of_stack(Tokenizer*); | |||||
PyObject* Tokenizer_pop(Tokenizer*); | |||||
PyObject* Tokenizer_pop_keeping_context(Tokenizer*); | |||||
void Tokenizer_memoize_bad_route(Tokenizer*); | |||||
void* Tokenizer_fail_route(Tokenizer*); | |||||
int Tokenizer_check_route(Tokenizer*, uint64_t); | |||||
void Tokenizer_free_bad_route_tree(Tokenizer*); | |||||
int Tokenizer_emit_token(Tokenizer*, PyObject*, int); | |||||
int Tokenizer_emit_token_kwargs(Tokenizer*, PyObject*, PyObject*, int); | |||||
int Tokenizer_emit_char(Tokenizer*, Py_UCS4); | |||||
int Tokenizer_emit_text(Tokenizer*, const char*); | |||||
int Tokenizer_emit_textbuffer(Tokenizer*, Textbuffer*); | |||||
int Tokenizer_emit_all(Tokenizer*, PyObject*); | |||||
int Tokenizer_emit_text_then_stack(Tokenizer*, const char*); | |||||
Py_UCS4 Tokenizer_read(Tokenizer*, Py_ssize_t); | |||||
Py_UCS4 Tokenizer_read_backwards(Tokenizer*, Py_ssize_t); | |||||
int Tokenizer_push(Tokenizer *, uint64_t); | |||||
int Tokenizer_push_textbuffer(Tokenizer *); | |||||
void Tokenizer_delete_top_of_stack(Tokenizer *); | |||||
PyObject *Tokenizer_pop(Tokenizer *); | |||||
PyObject *Tokenizer_pop_keeping_context(Tokenizer *); | |||||
void Tokenizer_memoize_bad_route(Tokenizer *); | |||||
void *Tokenizer_fail_route(Tokenizer *); | |||||
int Tokenizer_check_route(Tokenizer *, uint64_t); | |||||
void Tokenizer_free_bad_route_tree(Tokenizer *); | |||||
int Tokenizer_emit_token(Tokenizer *, PyObject *, int); | |||||
int Tokenizer_emit_token_kwargs(Tokenizer *, PyObject *, PyObject *, int); | |||||
int Tokenizer_emit_char(Tokenizer *, Py_UCS4); | |||||
int Tokenizer_emit_text(Tokenizer *, const char *); | |||||
int Tokenizer_emit_textbuffer(Tokenizer *, Textbuffer *); | |||||
int Tokenizer_emit_all(Tokenizer *, PyObject *); | |||||
int Tokenizer_emit_text_then_stack(Tokenizer *, const char *); | |||||
Py_UCS4 Tokenizer_read(Tokenizer *, Py_ssize_t); | |||||
Py_UCS4 Tokenizer_read_backwards(Tokenizer *, Py_ssize_t); | |||||
/* Macros */ | /* Macros */ | ||||
#define MAX_DEPTH 40 | |||||
#define Tokenizer_CAN_RECURSE(self) \ | |||||
(self->depth < MAX_DEPTH) | |||||
#define Tokenizer_IS_CURRENT_STACK(self, id) \ | |||||
(self->topstack->ident.head == (id).head && \ | |||||
#define MAX_DEPTH 40 | |||||
#define Tokenizer_CAN_RECURSE(self) (self->depth < MAX_DEPTH) | |||||
#define Tokenizer_IS_CURRENT_STACK(self, id) \ | |||||
(self->topstack->ident.head == (id).head && \ | |||||
self->topstack->ident.context == (id).context) | self->topstack->ident.context == (id).context) | ||||
#define Tokenizer_emit(self, token) \ | |||||
Tokenizer_emit_token(self, token, 0) | |||||
#define Tokenizer_emit_first(self, token) \ | |||||
Tokenizer_emit_token(self, token, 1) | |||||
#define Tokenizer_emit_kwargs(self, token, kwargs) \ | |||||
#define Tokenizer_emit(self, token) Tokenizer_emit_token(self, token, 0) | |||||
#define Tokenizer_emit_first(self, token) Tokenizer_emit_token(self, token, 1) | |||||
#define Tokenizer_emit_kwargs(self, token, kwargs) \ | |||||
Tokenizer_emit_token_kwargs(self, token, kwargs, 0) | Tokenizer_emit_token_kwargs(self, token, kwargs, 0) | ||||
#define Tokenizer_emit_first_kwargs(self, token, kwargs) \ | |||||
#define Tokenizer_emit_first_kwargs(self, token, kwargs) \ | |||||
Tokenizer_emit_token_kwargs(self, token, kwargs, 1) | Tokenizer_emit_token_kwargs(self, token, kwargs, 1) |
@@ -30,12 +30,12 @@ SOFTWARE. | |||||
int route_state; | int route_state; | ||||
uint64_t route_context; | uint64_t route_context; | ||||
char** entitydefs; | |||||
char **entitydefs; | |||||
PyObject* NOARGS; | |||||
PyObject* definitions; | |||||
PyObject *NOARGS; | |||||
PyObject *definitions; | |||||
static PyObject* ParserError; | |||||
static PyObject *ParserError; | |||||
/* Forward declarations */ | /* Forward declarations */ | ||||
@@ -44,17 +44,18 @@ static int load_exceptions(void); | |||||
/* | /* | ||||
Create a new tokenizer object. | Create a new tokenizer object. | ||||
*/ | */ | ||||
static PyObject* | |||||
Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds) | |||||
static PyObject * | |||||
Tokenizer_new(PyTypeObject *type, PyObject *args, PyObject *kwds) | |||||
{ | { | ||||
Tokenizer* self = (Tokenizer*) type->tp_alloc(type, 0); | |||||
return (PyObject*) self; | |||||
Tokenizer *self = (Tokenizer *) type->tp_alloc(type, 0); | |||||
return (PyObject *) self; | |||||
} | } | ||||
/* | /* | ||||
Deallocate the given tokenizer's text field. | Deallocate the given tokenizer's text field. | ||||
*/ | */ | ||||
static void dealloc_tokenizer_text(TokenizerInput* text) | |||||
static void | |||||
dealloc_tokenizer_text(TokenizerInput *text) | |||||
{ | { | ||||
Py_XDECREF(text->object); | Py_XDECREF(text->object); | ||||
} | } | ||||
@@ -62,7 +63,8 @@ static void dealloc_tokenizer_text(TokenizerInput* text) | |||||
/* | /* | ||||
Deallocate the given tokenizer object. | Deallocate the given tokenizer object. | ||||
*/ | */ | ||||
static void Tokenizer_dealloc(Tokenizer* self) | |||||
static void | |||||
Tokenizer_dealloc(Tokenizer *self) | |||||
{ | { | ||||
Stack *this = self->topstack, *next; | Stack *this = self->topstack, *next; | ||||
dealloc_tokenizer_text(&self->text); | dealloc_tokenizer_text(&self->text); | ||||
@@ -74,13 +76,14 @@ static void Tokenizer_dealloc(Tokenizer* self) | |||||
free(this); | free(this); | ||||
this = next; | this = next; | ||||
} | } | ||||
Py_TYPE(self)->tp_free((PyObject*) self); | |||||
Py_TYPE(self)->tp_free((PyObject *) self); | |||||
} | } | ||||
/* | /* | ||||
Initialize a new tokenizer instance's text field. | Initialize a new tokenizer instance's text field. | ||||
*/ | */ | ||||
static void init_tokenizer_text(TokenizerInput* text) | |||||
static void | |||||
init_tokenizer_text(TokenizerInput *text) | |||||
{ | { | ||||
text->object = Py_None; | text->object = Py_None; | ||||
Py_INCREF(Py_None); | Py_INCREF(Py_None); | ||||
@@ -92,12 +95,14 @@ static void init_tokenizer_text(TokenizerInput* text) | |||||
/* | /* | ||||
Initialize a new tokenizer instance by setting instance attributes. | Initialize a new tokenizer instance by setting instance attributes. | ||||
*/ | */ | ||||
static int Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds) | |||||
static int | |||||
Tokenizer_init(Tokenizer *self, PyObject *args, PyObject *kwds) | |||||
{ | { | ||||
static char* kwlist[] = {NULL}; | |||||
static char *kwlist[] = {NULL}; | |||||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "", kwlist)) | |||||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "", kwlist)) { | |||||
return -1; | return -1; | ||||
} | |||||
init_tokenizer_text(&self->text); | init_tokenizer_text(&self->text); | ||||
self->topstack = NULL; | self->topstack = NULL; | ||||
self->head = self->global = self->depth = 0; | self->head = self->global = self->depth = 0; | ||||
@@ -110,13 +115,15 @@ static int Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds) | |||||
/* | /* | ||||
Load input text into the tokenizer. | Load input text into the tokenizer. | ||||
*/ | */ | ||||
static int load_tokenizer_text(TokenizerInput* text, PyObject *input) | |||||
static int | |||||
load_tokenizer_text(TokenizerInput *text, PyObject *input) | |||||
{ | { | ||||
dealloc_tokenizer_text(text); | dealloc_tokenizer_text(text); | ||||
text->object = input; | text->object = input; | ||||
if (PyUnicode_READY(input) < 0) | |||||
if (PyUnicode_READY(input) < 0) { | |||||
return -1; | return -1; | ||||
} | |||||
text->kind = PyUnicode_KIND(input); | text->kind = PyUnicode_KIND(input); | ||||
text->data = PyUnicode_DATA(input); | text->data = PyUnicode_DATA(input); | ||||
text->length = PyUnicode_GET_LENGTH(input); | text->length = PyUnicode_GET_LENGTH(input); | ||||
@@ -126,7 +133,8 @@ static int load_tokenizer_text(TokenizerInput* text, PyObject *input) | |||||
/* | /* | ||||
Build a list of tokens from a string of wikicode and return it. | Build a list of tokens from a string of wikicode and return it. | ||||
*/ | */ | ||||
static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) | |||||
static PyObject * | |||||
Tokenizer_tokenize(Tokenizer *self, PyObject *args) | |||||
{ | { | ||||
PyObject *input, *tokens; | PyObject *input, *tokens; | ||||
unsigned long long context = 0; | unsigned long long context = 0; | ||||
@@ -134,22 +142,25 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) | |||||
if (PyArg_ParseTuple(args, "U|Kp", &input, &context, &skip_style_tags)) { | if (PyArg_ParseTuple(args, "U|Kp", &input, &context, &skip_style_tags)) { | ||||
Py_INCREF(input); | Py_INCREF(input); | ||||
if (load_tokenizer_text(&self->text, input)) | |||||
if (load_tokenizer_text(&self->text, input)) { | |||||
return NULL; | return NULL; | ||||
} | |||||
else { | |||||
} | |||||
} else { | |||||
const char *encoded; | const char *encoded; | ||||
Py_ssize_t size; | Py_ssize_t size; | ||||
/* Failed to parse a Unicode object; try a string instead. */ | /* Failed to parse a Unicode object; try a string instead. */ | ||||
PyErr_Clear(); | PyErr_Clear(); | ||||
if (!PyArg_ParseTuple(args, "s#|Kp", &encoded, &size, &context, | |||||
&skip_style_tags)) | |||||
if (!PyArg_ParseTuple( | |||||
args, "s#|Kp", &encoded, &size, &context, &skip_style_tags)) { | |||||
return NULL; | return NULL; | ||||
if (!(input = PyUnicode_FromStringAndSize(encoded, size))) | |||||
} | |||||
if (!(input = PyUnicode_FromStringAndSize(encoded, size))) { | |||||
return NULL; | return NULL; | ||||
if (load_tokenizer_text(&self->text, input)) | |||||
} | |||||
if (load_tokenizer_text(&self->text, input)) { | |||||
return NULL; | return NULL; | ||||
} | |||||
} | } | ||||
self->head = self->global = self->depth = 0; | self->head = self->global = self->depth = 0; | ||||
@@ -162,73 +173,83 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) | |||||
if (!tokens || self->topstack) { | if (!tokens || self->topstack) { | ||||
Py_XDECREF(tokens); | Py_XDECREF(tokens); | ||||
if (PyErr_Occurred()) | |||||
if (PyErr_Occurred()) { | |||||
return NULL; | return NULL; | ||||
if (!ParserError && load_exceptions() < 0) | |||||
} | |||||
if (!ParserError && load_exceptions() < 0) { | |||||
return NULL; | return NULL; | ||||
} | |||||
if (BAD_ROUTE) { | if (BAD_ROUTE) { | ||||
RESET_ROUTE(); | RESET_ROUTE(); | ||||
PyErr_SetString(ParserError, "C tokenizer exited with BAD_ROUTE"); | PyErr_SetString(ParserError, "C tokenizer exited with BAD_ROUTE"); | ||||
} | |||||
else if (self->topstack) | |||||
} else if (self->topstack) { | |||||
PyErr_SetString(ParserError, | PyErr_SetString(ParserError, | ||||
"C tokenizer exited with non-empty token stack"); | "C tokenizer exited with non-empty token stack"); | ||||
else | |||||
} else { | |||||
PyErr_SetString(ParserError, "C tokenizer exited unexpectedly"); | PyErr_SetString(ParserError, "C tokenizer exited unexpectedly"); | ||||
} | |||||
return NULL; | return NULL; | ||||
} | } | ||||
return tokens; | return tokens; | ||||
} | } | ||||
static int load_entities(void) | |||||
static int | |||||
load_entities(void) | |||||
{ | { | ||||
PyObject *tempmod, *defmap, *deflist; | PyObject *tempmod, *defmap, *deflist; | ||||
unsigned numdefs, i; | unsigned numdefs, i; | ||||
PyObject *string; | PyObject *string; | ||||
tempmod = PyImport_ImportModule("html.entities"); | tempmod = PyImport_ImportModule("html.entities"); | ||||
if (!tempmod) | |||||
if (!tempmod) { | |||||
return -1; | return -1; | ||||
} | |||||
defmap = PyObject_GetAttrString(tempmod, "entitydefs"); | defmap = PyObject_GetAttrString(tempmod, "entitydefs"); | ||||
if (!defmap) | |||||
if (!defmap) { | |||||
return -1; | return -1; | ||||
} | |||||
Py_DECREF(tempmod); | Py_DECREF(tempmod); | ||||
deflist = PyDict_Keys(defmap); | deflist = PyDict_Keys(defmap); | ||||
if (!deflist) | |||||
if (!deflist) { | |||||
return -1; | return -1; | ||||
} | |||||
Py_DECREF(defmap); | Py_DECREF(defmap); | ||||
numdefs = (unsigned) PyList_GET_SIZE(deflist); | numdefs = (unsigned) PyList_GET_SIZE(deflist); | ||||
entitydefs = calloc(numdefs + 1, sizeof(char*)); | |||||
if (!entitydefs) | |||||
entitydefs = calloc(numdefs + 1, sizeof(char *)); | |||||
if (!entitydefs) { | |||||
return -1; | return -1; | ||||
} | |||||
for (i = 0; i < numdefs; i++) { | for (i = 0; i < numdefs; i++) { | ||||
string = PyUnicode_AsASCIIString(PyList_GET_ITEM(deflist, i)); | string = PyUnicode_AsASCIIString(PyList_GET_ITEM(deflist, i)); | ||||
if (!string) | |||||
if (!string) { | |||||
return -1; | return -1; | ||||
} | |||||
entitydefs[i] = PyBytes_AsString(string); | entitydefs[i] = PyBytes_AsString(string); | ||||
if (!entitydefs[i]) | |||||
if (!entitydefs[i]) { | |||||
return -1; | return -1; | ||||
} | |||||
} | } | ||||
Py_DECREF(deflist); | Py_DECREF(deflist); | ||||
return 0; | return 0; | ||||
} | } | ||||
static int load_tokens(void) | |||||
static int | |||||
load_tokens(void) | |||||
{ | { | ||||
PyObject *tempmod, *tokens, | |||||
*globals = PyEval_GetGlobals(), | |||||
*locals = PyEval_GetLocals(), | |||||
*fromlist = PyList_New(1), | |||||
*modname = PyUnicode_FromString("tokens"); | |||||
PyObject *tempmod, *tokens; | |||||
PyObject *globals = PyEval_GetGlobals(), *locals = PyEval_GetLocals(), | |||||
*fromlist = PyList_New(1), *modname = PyUnicode_FromString("tokens"); | |||||
char *name = "mwparserfromhell.parser"; | char *name = "mwparserfromhell.parser"; | ||||
if (!fromlist || !modname) | |||||
if (!fromlist || !modname) { | |||||
return -1; | return -1; | ||||
} | |||||
PyList_SET_ITEM(fromlist, 0, modname); | PyList_SET_ITEM(fromlist, 0, modname); | ||||
tempmod = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0); | tempmod = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0); | ||||
Py_DECREF(fromlist); | Py_DECREF(fromlist); | ||||
if (!tempmod) | |||||
if (!tempmod) { | |||||
return -1; | return -1; | ||||
} | |||||
tokens = PyObject_GetAttrString(tempmod, "tokens"); | tokens = PyObject_GetAttrString(tempmod, "tokens"); | ||||
Py_DECREF(tempmod); | Py_DECREF(tempmod); | ||||
load_tokens_from_module(tokens); | load_tokens_from_module(tokens); | ||||
@@ -236,43 +257,45 @@ static int load_tokens(void) | |||||
return 0; | return 0; | ||||
} | } | ||||
static int load_defs(void) | |||||
static int | |||||
load_defs(void) | |||||
{ | { | ||||
PyObject *tempmod, | |||||
*globals = PyEval_GetGlobals(), | |||||
*locals = PyEval_GetLocals(), | |||||
*fromlist = PyList_New(1), | |||||
*modname = PyUnicode_FromString("definitions"); | |||||
PyObject *tempmod; | |||||
PyObject *globals = PyEval_GetGlobals(), *locals = PyEval_GetLocals(), | |||||
*fromlist = PyList_New(1), *modname = PyUnicode_FromString("definitions"); | |||||
char *name = "mwparserfromhell"; | char *name = "mwparserfromhell"; | ||||
if (!fromlist || !modname) | |||||
if (!fromlist || !modname) { | |||||
return -1; | return -1; | ||||
} | |||||
PyList_SET_ITEM(fromlist, 0, modname); | PyList_SET_ITEM(fromlist, 0, modname); | ||||
tempmod = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0); | tempmod = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0); | ||||
Py_DECREF(fromlist); | Py_DECREF(fromlist); | ||||
if (!tempmod) | |||||
if (!tempmod) { | |||||
return -1; | return -1; | ||||
} | |||||
definitions = PyObject_GetAttrString(tempmod, "definitions"); | definitions = PyObject_GetAttrString(tempmod, "definitions"); | ||||
Py_DECREF(tempmod); | Py_DECREF(tempmod); | ||||
return 0; | return 0; | ||||
} | } | ||||
static int load_exceptions(void) | |||||
static int | |||||
load_exceptions(void) | |||||
{ | { | ||||
PyObject *tempmod, *parsermod, | |||||
*globals = PyEval_GetGlobals(), | |||||
*locals = PyEval_GetLocals(), | |||||
*fromlist = PyList_New(1), | |||||
*modname = PyUnicode_FromString("parser"); | |||||
PyObject *tempmod, *parsermod; | |||||
PyObject *globals = PyEval_GetGlobals(), *locals = PyEval_GetLocals(), | |||||
*fromlist = PyList_New(1), *modname = PyUnicode_FromString("parser"); | |||||
char *name = "mwparserfromhell"; | char *name = "mwparserfromhell"; | ||||
if (!fromlist || !modname) | |||||
if (!fromlist || !modname) { | |||||
return -1; | return -1; | ||||
} | |||||
PyList_SET_ITEM(fromlist, 0, modname); | PyList_SET_ITEM(fromlist, 0, modname); | ||||
tempmod = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0); | tempmod = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0); | ||||
Py_DECREF(fromlist); | Py_DECREF(fromlist); | ||||
if (!tempmod) | |||||
if (!tempmod) { | |||||
return -1; | return -1; | ||||
} | |||||
parsermod = PyObject_GetAttrString(tempmod, "parser"); | parsermod = PyObject_GetAttrString(tempmod, "parser"); | ||||
Py_DECREF(tempmod); | Py_DECREF(tempmod); | ||||
ParserError = PyObject_GetAttrString(parsermod, "ParserError"); | ParserError = PyObject_GetAttrString(parsermod, "ParserError"); | ||||
@@ -280,22 +303,26 @@ static int load_exceptions(void) | |||||
return 0; | return 0; | ||||
} | } | ||||
PyMODINIT_FUNC PyInit__tokenizer(void) | |||||
PyMODINIT_FUNC | |||||
PyInit__tokenizer(void) | |||||
{ | { | ||||
PyObject *module; | PyObject *module; | ||||
TokenizerType.tp_new = PyType_GenericNew; | TokenizerType.tp_new = PyType_GenericNew; | ||||
if (PyType_Ready(&TokenizerType) < 0) | |||||
if (PyType_Ready(&TokenizerType) < 0) { | |||||
return NULL; | return NULL; | ||||
} | |||||
module = PyModule_Create(&module_def); | module = PyModule_Create(&module_def); | ||||
if (!module) | |||||
if (!module) { | |||||
return NULL; | return NULL; | ||||
} | |||||
Py_INCREF(&TokenizerType); | Py_INCREF(&TokenizerType); | ||||
PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType); | |||||
PyModule_AddObject(module, "CTokenizer", (PyObject *) &TokenizerType); | |||||
Py_INCREF(Py_True); | Py_INCREF(Py_True); | ||||
PyDict_SetItemString(TokenizerType.tp_dict, "USES_C", Py_True); | PyDict_SetItemString(TokenizerType.tp_dict, "USES_C", Py_True); | ||||
NOARGS = PyTuple_New(0); | NOARGS = PyTuple_New(0); | ||||
if (!NOARGS || load_entities() || load_tokens() || load_defs()) | |||||
if (!NOARGS || load_entities() || load_tokens() || load_defs()) { | |||||
return NULL; | return NULL; | ||||
} | |||||
return module; | return module; | ||||
} | } |
@@ -27,67 +27,76 @@ SOFTWARE. | |||||
/* Functions */ | /* Functions */ | ||||
static PyObject* Tokenizer_new(PyTypeObject*, PyObject*, PyObject*); | |||||
static void Tokenizer_dealloc(Tokenizer*); | |||||
static int Tokenizer_init(Tokenizer*, PyObject*, PyObject*); | |||||
static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*); | |||||
static PyObject *Tokenizer_new(PyTypeObject *, PyObject *, PyObject *); | |||||
static void Tokenizer_dealloc(Tokenizer *); | |||||
static int Tokenizer_init(Tokenizer *, PyObject *, PyObject *); | |||||
static PyObject *Tokenizer_tokenize(Tokenizer *, PyObject *); | |||||
/* Structs */ | /* Structs */ | ||||
static PyMethodDef Tokenizer_methods[] = { | static PyMethodDef Tokenizer_methods[] = { | ||||
{"tokenize", (PyCFunction) Tokenizer_tokenize, METH_VARARGS, | |||||
"Build a list of tokens from a string of wikicode and return it."}, | |||||
{NULL} | |||||
{ | |||||
"tokenize", | |||||
(PyCFunction) Tokenizer_tokenize, | |||||
METH_VARARGS, | |||||
"Build a list of tokens from a string of wikicode and return it.", | |||||
}, | |||||
{NULL}, | |||||
}; | }; | ||||
static PyMemberDef Tokenizer_members[] = { | static PyMemberDef Tokenizer_members[] = { | ||||
{NULL} | |||||
{NULL}, | |||||
}; | }; | ||||
static PyTypeObject TokenizerType = { | static PyTypeObject TokenizerType = { | ||||
PyVarObject_HEAD_INIT(NULL, 0) | |||||
"_tokenizer.CTokenizer", /* tp_name */ | |||||
sizeof(Tokenizer), /* tp_basicsize */ | |||||
0, /* tp_itemsize */ | |||||
(destructor) Tokenizer_dealloc, /* tp_dealloc */ | |||||
0, /* tp_print */ | |||||
0, /* tp_getattr */ | |||||
0, /* tp_setattr */ | |||||
0, /* tp_compare */ | |||||
0, /* tp_repr */ | |||||
0, /* tp_as_number */ | |||||
0, /* tp_as_sequence */ | |||||
0, /* tp_as_mapping */ | |||||
0, /* tp_hash */ | |||||
0, /* tp_call */ | |||||
0, /* tp_str */ | |||||
0, /* tp_getattro */ | |||||
0, /* tp_setattro */ | |||||
0, /* tp_as_buffer */ | |||||
Py_TPFLAGS_DEFAULT, /* tp_flags */ | |||||
"Creates a list of tokens from a string of wikicode.", /* tp_doc */ | |||||
0, /* tp_traverse */ | |||||
0, /* tp_clear */ | |||||
0, /* tp_richcompare */ | |||||
0, /* tp_weaklistoffset */ | |||||
0, /* tp_iter */ | |||||
0, /* tp_iternext */ | |||||
Tokenizer_methods, /* tp_methods */ | |||||
Tokenizer_members, /* tp_members */ | |||||
0, /* tp_getset */ | |||||
0, /* tp_base */ | |||||
0, /* tp_dict */ | |||||
0, /* tp_descr_get */ | |||||
0, /* tp_descr_set */ | |||||
0, /* tp_dictoffset */ | |||||
(initproc) Tokenizer_init, /* tp_init */ | |||||
0, /* tp_alloc */ | |||||
Tokenizer_new, /* tp_new */ | |||||
PyVarObject_HEAD_INIT(NULL, 0) /* header */ | |||||
"_tokenizer.CTokenizer", /* tp_name */ | |||||
sizeof(Tokenizer), /* tp_basicsize */ | |||||
0, /* tp_itemsize */ | |||||
(destructor) Tokenizer_dealloc, /* tp_dealloc */ | |||||
0, /* tp_print */ | |||||
0, /* tp_getattr */ | |||||
0, /* tp_setattr */ | |||||
0, /* tp_compare */ | |||||
0, /* tp_repr */ | |||||
0, /* tp_as_number */ | |||||
0, /* tp_as_sequence */ | |||||
0, /* tp_as_mapping */ | |||||
0, /* tp_hash */ | |||||
0, /* tp_call */ | |||||
0, /* tp_str */ | |||||
0, /* tp_getattro */ | |||||
0, /* tp_setattro */ | |||||
0, /* tp_as_buffer */ | |||||
Py_TPFLAGS_DEFAULT, /* tp_flags */ | |||||
"Creates a list of tokens from a string of wikicode.", /* tp_doc */ | |||||
0, /* tp_traverse */ | |||||
0, /* tp_clear */ | |||||
0, /* tp_richcompare */ | |||||
0, /* tp_weaklistoffset */ | |||||
0, /* tp_iter */ | |||||
0, /* tp_iternext */ | |||||
Tokenizer_methods, /* tp_methods */ | |||||
Tokenizer_members, /* tp_members */ | |||||
0, /* tp_getset */ | |||||
0, /* tp_base */ | |||||
0, /* tp_dict */ | |||||
0, /* tp_descr_get */ | |||||
0, /* tp_descr_set */ | |||||
0, /* tp_dictoffset */ | |||||
(initproc) Tokenizer_init, /* tp_init */ | |||||
0, /* tp_alloc */ | |||||
Tokenizer_new, /* tp_new */ | |||||
}; | }; | ||||
static PyModuleDef module_def = { | static PyModuleDef module_def = { | ||||
PyModuleDef_HEAD_INIT, | PyModuleDef_HEAD_INIT, | ||||
"_tokenizer", | "_tokenizer", | ||||
"Creates a list of tokens from a string of wikicode.", | "Creates a list of tokens from a string of wikicode.", | ||||
-1, NULL, NULL, NULL, NULL, NULL | |||||
-1, | |||||
NULL, | |||||
NULL, | |||||
NULL, | |||||
NULL, | |||||
NULL, | |||||
}; | }; |
@@ -24,56 +24,55 @@ SOFTWARE. | |||||
/* Globals */ | /* Globals */ | ||||
PyObject* Text; | |||||
PyObject* TemplateOpen; | |||||
PyObject* TemplateParamSeparator; | |||||
PyObject* TemplateParamEquals; | |||||
PyObject* TemplateClose; | |||||
PyObject* ArgumentOpen; | |||||
PyObject* ArgumentSeparator; | |||||
PyObject* ArgumentClose; | |||||
PyObject* WikilinkOpen; | |||||
PyObject* WikilinkSeparator; | |||||
PyObject* WikilinkClose; | |||||
PyObject* ExternalLinkOpen; | |||||
PyObject* ExternalLinkSeparator; | |||||
PyObject* ExternalLinkClose; | |||||
PyObject* HTMLEntityStart; | |||||
PyObject* HTMLEntityNumeric; | |||||
PyObject* HTMLEntityHex; | |||||
PyObject* HTMLEntityEnd; | |||||
PyObject* HeadingStart; | |||||
PyObject* HeadingEnd; | |||||
PyObject* CommentStart; | |||||
PyObject* CommentEnd; | |||||
PyObject* TagOpenOpen; | |||||
PyObject* TagAttrStart; | |||||
PyObject* TagAttrEquals; | |||||
PyObject* TagAttrQuote; | |||||
PyObject* TagCloseOpen; | |||||
PyObject* TagCloseSelfclose; | |||||
PyObject* TagOpenClose; | |||||
PyObject* TagCloseClose; | |||||
PyObject *Text; | |||||
PyObject *TemplateOpen; | |||||
PyObject *TemplateParamSeparator; | |||||
PyObject *TemplateParamEquals; | |||||
PyObject *TemplateClose; | |||||
PyObject *ArgumentOpen; | |||||
PyObject *ArgumentSeparator; | |||||
PyObject *ArgumentClose; | |||||
PyObject *WikilinkOpen; | |||||
PyObject *WikilinkSeparator; | |||||
PyObject *WikilinkClose; | |||||
PyObject *ExternalLinkOpen; | |||||
PyObject *ExternalLinkSeparator; | |||||
PyObject *ExternalLinkClose; | |||||
PyObject *HTMLEntityStart; | |||||
PyObject *HTMLEntityNumeric; | |||||
PyObject *HTMLEntityHex; | |||||
PyObject *HTMLEntityEnd; | |||||
PyObject *HeadingStart; | |||||
PyObject *HeadingEnd; | |||||
PyObject *CommentStart; | |||||
PyObject *CommentEnd; | |||||
PyObject *TagOpenOpen; | |||||
PyObject *TagAttrStart; | |||||
PyObject *TagAttrEquals; | |||||
PyObject *TagAttrQuote; | |||||
PyObject *TagCloseOpen; | |||||
PyObject *TagCloseSelfclose; | |||||
PyObject *TagOpenClose; | |||||
PyObject *TagCloseClose; | |||||
/* | /* | ||||
Load individual tokens into globals from the given Python module object. | Load individual tokens into globals from the given Python module object. | ||||
*/ | */ | ||||
void load_tokens_from_module(PyObject* module) | |||||
void | |||||
load_tokens_from_module(PyObject *module) | |||||
{ | { | ||||
Text = PyObject_GetAttrString(module, "Text"); | Text = PyObject_GetAttrString(module, "Text"); | ||||
TemplateOpen = PyObject_GetAttrString(module, "TemplateOpen"); | TemplateOpen = PyObject_GetAttrString(module, "TemplateOpen"); | ||||
TemplateParamSeparator = PyObject_GetAttrString(module, | |||||
"TemplateParamSeparator"); | |||||
TemplateParamEquals = PyObject_GetAttrString(module, | |||||
"TemplateParamEquals"); | |||||
TemplateParamSeparator = PyObject_GetAttrString(module, "TemplateParamSeparator"); | |||||
TemplateParamEquals = PyObject_GetAttrString(module, "TemplateParamEquals"); | |||||
TemplateClose = PyObject_GetAttrString(module, "TemplateClose"); | TemplateClose = PyObject_GetAttrString(module, "TemplateClose"); | ||||
ArgumentOpen = PyObject_GetAttrString(module, "ArgumentOpen"); | ArgumentOpen = PyObject_GetAttrString(module, "ArgumentOpen"); | ||||
@@ -85,8 +84,7 @@ void load_tokens_from_module(PyObject* module) | |||||
WikilinkClose = PyObject_GetAttrString(module, "WikilinkClose"); | WikilinkClose = PyObject_GetAttrString(module, "WikilinkClose"); | ||||
ExternalLinkOpen = PyObject_GetAttrString(module, "ExternalLinkOpen"); | ExternalLinkOpen = PyObject_GetAttrString(module, "ExternalLinkOpen"); | ||||
ExternalLinkSeparator = PyObject_GetAttrString(module, | |||||
"ExternalLinkSeparator"); | |||||
ExternalLinkSeparator = PyObject_GetAttrString(module, "ExternalLinkSeparator"); | |||||
ExternalLinkClose = PyObject_GetAttrString(module, "ExternalLinkClose"); | ExternalLinkClose = PyObject_GetAttrString(module, "ExternalLinkClose"); | ||||
HTMLEntityStart = PyObject_GetAttrString(module, "HTMLEntityStart"); | HTMLEntityStart = PyObject_GetAttrString(module, "HTMLEntityStart"); | ||||
@@ -26,44 +26,44 @@ SOFTWARE. | |||||
/* Token globals */ | /* Token globals */ | ||||
extern PyObject* Text; | |||||
extern PyObject* TemplateOpen; | |||||
extern PyObject* TemplateParamSeparator; | |||||
extern PyObject* TemplateParamEquals; | |||||
extern PyObject* TemplateClose; | |||||
extern PyObject* ArgumentOpen; | |||||
extern PyObject* ArgumentSeparator; | |||||
extern PyObject* ArgumentClose; | |||||
extern PyObject* WikilinkOpen; | |||||
extern PyObject* WikilinkSeparator; | |||||
extern PyObject* WikilinkClose; | |||||
extern PyObject* ExternalLinkOpen; | |||||
extern PyObject* ExternalLinkSeparator; | |||||
extern PyObject* ExternalLinkClose; | |||||
extern PyObject* HTMLEntityStart; | |||||
extern PyObject* HTMLEntityNumeric; | |||||
extern PyObject* HTMLEntityHex; | |||||
extern PyObject* HTMLEntityEnd; | |||||
extern PyObject* HeadingStart; | |||||
extern PyObject* HeadingEnd; | |||||
extern PyObject* CommentStart; | |||||
extern PyObject* CommentEnd; | |||||
extern PyObject* TagOpenOpen; | |||||
extern PyObject* TagAttrStart; | |||||
extern PyObject* TagAttrEquals; | |||||
extern PyObject* TagAttrQuote; | |||||
extern PyObject* TagCloseOpen; | |||||
extern PyObject* TagCloseSelfclose; | |||||
extern PyObject* TagOpenClose; | |||||
extern PyObject* TagCloseClose; | |||||
extern PyObject *Text; | |||||
extern PyObject *TemplateOpen; | |||||
extern PyObject *TemplateParamSeparator; | |||||
extern PyObject *TemplateParamEquals; | |||||
extern PyObject *TemplateClose; | |||||
extern PyObject *ArgumentOpen; | |||||
extern PyObject *ArgumentSeparator; | |||||
extern PyObject *ArgumentClose; | |||||
extern PyObject *WikilinkOpen; | |||||
extern PyObject *WikilinkSeparator; | |||||
extern PyObject *WikilinkClose; | |||||
extern PyObject *ExternalLinkOpen; | |||||
extern PyObject *ExternalLinkSeparator; | |||||
extern PyObject *ExternalLinkClose; | |||||
extern PyObject *HTMLEntityStart; | |||||
extern PyObject *HTMLEntityNumeric; | |||||
extern PyObject *HTMLEntityHex; | |||||
extern PyObject *HTMLEntityEnd; | |||||
extern PyObject *HeadingStart; | |||||
extern PyObject *HeadingEnd; | |||||
extern PyObject *CommentStart; | |||||
extern PyObject *CommentEnd; | |||||
extern PyObject *TagOpenOpen; | |||||
extern PyObject *TagAttrStart; | |||||
extern PyObject *TagAttrEquals; | |||||
extern PyObject *TagAttrQuote; | |||||
extern PyObject *TagCloseOpen; | |||||
extern PyObject *TagCloseSelfclose; | |||||
extern PyObject *TagOpenClose; | |||||
extern PyObject *TagCloseClose; | |||||
/* Functions */ | /* Functions */ | ||||
void load_tokens_from_module(PyObject*); | |||||
void load_tokens_from_module(PyObject *); |
@@ -20,6 +20,7 @@ | |||||
__all__ = ["ParserError"] | __all__ = ["ParserError"] | ||||
class ParserError(Exception): | class ParserError(Exception): | ||||
"""Exception raised when an internal error occurs while parsing. | """Exception raised when an internal error occurs while parsing. | ||||
@@ -28,6 +29,7 @@ class ParserError(Exception): | |||||
with an impossible internal state and is bailing out before other problems | with an impossible internal state and is bailing out before other problems | ||||
can happen. Its appearance indicates a bug. | can happen. Its appearance indicates a bug. | ||||
""" | """ | ||||
def __init__(self, extra): | def __init__(self, extra): | ||||
msg = "This is a bug and should be reported. Info: {}.".format(extra) | msg = "This is a bug and should be reported. Info: {}.".format(extra) | ||||
super().__init__(msg) | super().__init__(msg) |
@@ -24,11 +24,17 @@ import re | |||||
from . import contexts, tokens | from . import contexts, tokens | ||||
from .errors import ParserError | from .errors import ParserError | ||||
from ..definitions import (get_html_tag, is_parsable, is_single, | |||||
is_single_only, is_scheme) | |||||
from ..definitions import ( | |||||
get_html_tag, | |||||
is_parsable, | |||||
is_single, | |||||
is_single_only, | |||||
is_scheme, | |||||
) | |||||
__all__ = ["Tokenizer"] | __all__ = ["Tokenizer"] | ||||
class BadRoute(Exception): | class BadRoute(Exception): | ||||
"""Raised internally when the current tokenization route is invalid.""" | """Raised internally when the current tokenization route is invalid.""" | ||||
@@ -39,14 +45,15 @@ class BadRoute(Exception): | |||||
class _TagOpenData: | class _TagOpenData: | ||||
"""Stores data about an HTML open tag, like ``<ref name="foo">``.""" | """Stores data about an HTML open tag, like ``<ref name="foo">``.""" | ||||
CX_NAME = 1 << 0 | |||||
CX_ATTR_READY = 1 << 1 | |||||
CX_ATTR_NAME = 1 << 2 | |||||
CX_ATTR_VALUE = 1 << 3 | |||||
CX_QUOTED = 1 << 4 | |||||
CX_NOTE_SPACE = 1 << 5 | |||||
CX_NAME = 1 << 0 | |||||
CX_ATTR_READY = 1 << 1 | |||||
CX_ATTR_NAME = 1 << 2 | |||||
CX_ATTR_VALUE = 1 << 3 | |||||
CX_QUOTED = 1 << 4 | |||||
CX_NOTE_SPACE = 1 << 5 | |||||
CX_NOTE_EQUALS = 1 << 6 | CX_NOTE_EQUALS = 1 << 6 | ||||
CX_NOTE_QUOTE = 1 << 7 | |||||
CX_NOTE_QUOTE = 1 << 7 | |||||
def __init__(self): | def __init__(self): | ||||
self.context = self.CX_NAME | self.context = self.CX_NAME | ||||
@@ -57,11 +64,33 @@ class _TagOpenData: | |||||
class Tokenizer: | class Tokenizer: | ||||
"""Creates a list of tokens from a string of wikicode.""" | """Creates a list of tokens from a string of wikicode.""" | ||||
USES_C = False | USES_C = False | ||||
START = object() | START = object() | ||||
END = object() | END = object() | ||||
MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "'", '"', "#", "*", ";", | |||||
":", "/", "-", "!", "\n", START, END] | |||||
MARKERS = [ | |||||
"{", | |||||
"}", | |||||
"[", | |||||
"]", | |||||
"<", | |||||
">", | |||||
"|", | |||||
"=", | |||||
"&", | |||||
"'", | |||||
'"', | |||||
"#", | |||||
"*", | |||||
";", | |||||
":", | |||||
"/", | |||||
"-", | |||||
"!", | |||||
"\n", | |||||
START, | |||||
END, | |||||
] | |||||
URISCHEME = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-" | URISCHEME = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-" | ||||
MAX_DEPTH = 40 | MAX_DEPTH = 40 | ||||
regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE) | regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE) | ||||
@@ -437,13 +466,15 @@ class Tokenizer: | |||||
"""Return whether the current head is the end of a URI.""" | """Return whether the current head is the end of a URI.""" | ||||
# Built from _parse()'s end sentinels: | # Built from _parse()'s end sentinels: | ||||
after, ctx = self._read(2), self._context | after, ctx = self._read(2), self._context | ||||
return (this in (self.END, "\n", "[", "]", "<", ">", '"') or | |||||
" " in this or | |||||
this == nxt == "'" or | |||||
(this == "|" and ctx & contexts.TEMPLATE) or | |||||
(this == "=" and ctx & (contexts.TEMPLATE_PARAM_KEY | contexts.HEADING)) or | |||||
(this == nxt == "}" and ctx & contexts.TEMPLATE) or | |||||
(this == nxt == after == "}" and ctx & contexts.ARGUMENT)) | |||||
return ( | |||||
this in (self.END, "\n", "[", "]", "<", ">", '"') | |||||
or " " in this | |||||
or this == nxt == "'" | |||||
or (this == "|" and ctx & contexts.TEMPLATE) | |||||
or (this == "=" and ctx & (contexts.TEMPLATE_PARAM_KEY | contexts.HEADING)) | |||||
or (this == nxt == "}" and ctx & contexts.TEMPLATE) | |||||
or (this == nxt == after == "}" and ctx & contexts.ARGUMENT) | |||||
) | |||||
def _really_parse_external_link(self, brackets): | def _really_parse_external_link(self, brackets): | ||||
"""Really parse an external link.""" | """Really parse an external link.""" | ||||
@@ -681,9 +712,13 @@ class Tokenizer: | |||||
self._emit_first(tokens.TagAttrQuote(char=data.quoter)) | self._emit_first(tokens.TagAttrQuote(char=data.quoter)) | ||||
self._emit_all(self._pop()) | self._emit_all(self._pop()) | ||||
buf = data.padding_buffer | buf = data.padding_buffer | ||||
self._emit_first(tokens.TagAttrStart( | |||||
pad_first=buf["first"], pad_before_eq=buf["before_eq"], | |||||
pad_after_eq=buf["after_eq"])) | |||||
self._emit_first( | |||||
tokens.TagAttrStart( | |||||
pad_first=buf["first"], | |||||
pad_before_eq=buf["before_eq"], | |||||
pad_after_eq=buf["after_eq"], | |||||
) | |||||
) | |||||
self._emit_all(self._pop()) | self._emit_all(self._pop()) | ||||
for key in data.padding_buffer: | for key in data.padding_buffer: | ||||
data.padding_buffer[key] = "" | data.padding_buffer[key] = "" | ||||
@@ -691,7 +726,9 @@ class Tokenizer: | |||||
def _handle_tag_space(self, data, text): | def _handle_tag_space(self, data, text): | ||||
"""Handle whitespace (*text*) inside of an HTML open tag.""" | """Handle whitespace (*text*) inside of an HTML open tag.""" | ||||
ctx = data.context | ctx = data.context | ||||
end_of_value = ctx & data.CX_ATTR_VALUE and not ctx & (data.CX_QUOTED | data.CX_NOTE_QUOTE) | |||||
end_of_value = ctx & data.CX_ATTR_VALUE and not ctx & ( | |||||
data.CX_QUOTED | data.CX_NOTE_QUOTE | |||||
) | |||||
if end_of_value or (ctx & data.CX_QUOTED and ctx & data.CX_NOTE_SPACE): | if end_of_value or (ctx & data.CX_QUOTED and ctx & data.CX_NOTE_SPACE): | ||||
self._push_tag_buffer(data) | self._push_tag_buffer(data) | ||||
data.context = data.CX_ATTR_READY | data.context = data.CX_ATTR_READY | ||||
@@ -792,8 +829,10 @@ class Tokenizer: | |||||
"""Handle the ending of a closing tag (``</foo>``).""" | """Handle the ending of a closing tag (``</foo>``).""" | ||||
strip = lambda tok: tok.text.rstrip().lower() | strip = lambda tok: tok.text.rstrip().lower() | ||||
closing = self._pop() | closing = self._pop() | ||||
if len(closing) != 1 or (not isinstance(closing[0], tokens.Text) or | |||||
strip(closing[0]) != strip(self._stack[1])): | |||||
if len(closing) != 1 or ( | |||||
not isinstance(closing[0], tokens.Text) | |||||
or strip(closing[0]) != strip(self._stack[1]) | |||||
): | |||||
self._fail_route() | self._fail_route() | ||||
self._emit_all(closing) | self._emit_all(closing) | ||||
self._emit(tokens.TagCloseClose()) | self._emit(tokens.TagCloseClose()) | ||||
@@ -808,8 +847,9 @@ class Tokenizer: | |||||
self._fail_route() | self._fail_route() | ||||
elif this == "<" and nxt == "/": | elif this == "<" and nxt == "/": | ||||
self._head += 3 | self._head += 3 | ||||
if self._read() != ">" or (strip(self._read(-1)) != | |||||
strip(self._stack[1].text)): | |||||
if self._read() != ">" or ( | |||||
strip(self._read(-1)) != strip(self._stack[1].text) | |||||
): | |||||
self._head -= 1 | self._head -= 1 | ||||
self._emit_text("</") | self._emit_text("</") | ||||
continue | continue | ||||
@@ -862,8 +902,10 @@ class Tokenizer: | |||||
self._emit(tokens.TagOpenOpen()) | self._emit(tokens.TagOpenOpen()) | ||||
while True: | while True: | ||||
this, nxt = self._read(), self._read(1) | this, nxt = self._read(), self._read(1) | ||||
can_exit = (not data.context & (data.CX_QUOTED | data.CX_NAME) or | |||||
data.context & data.CX_NOTE_SPACE) | |||||
can_exit = ( | |||||
not data.context & (data.CX_QUOTED | data.CX_NAME) | |||||
or data.context & data.CX_NOTE_SPACE | |||||
) | |||||
if this is self.END: | if this is self.END: | ||||
if self._context & contexts.TAG_ATTR: | if self._context & contexts.TAG_ATTR: | ||||
if data.context & data.CX_QUOTED: | if data.context & data.CX_QUOTED: | ||||
@@ -1079,16 +1121,25 @@ class Tokenizer: | |||||
else: | else: | ||||
self._emit_text("\n") | self._emit_text("\n") | ||||
def _emit_table_tag(self, open_open_markup, tag, style, padding, | |||||
close_open_markup, contents, open_close_markup): | |||||
def _emit_table_tag( | |||||
self, | |||||
open_open_markup, | |||||
tag, | |||||
style, | |||||
padding, | |||||
close_open_markup, | |||||
contents, | |||||
open_close_markup, | |||||
): | |||||
"""Emit a table tag.""" | """Emit a table tag.""" | ||||
self._emit(tokens.TagOpenOpen(wiki_markup=open_open_markup)) | self._emit(tokens.TagOpenOpen(wiki_markup=open_open_markup)) | ||||
self._emit_text(tag) | self._emit_text(tag) | ||||
if style: | if style: | ||||
self._emit_all(style) | self._emit_all(style) | ||||
if close_open_markup: | if close_open_markup: | ||||
self._emit(tokens.TagCloseOpen(wiki_markup=close_open_markup, | |||||
padding=padding)) | |||||
self._emit( | |||||
tokens.TagCloseOpen(wiki_markup=close_open_markup, padding=padding) | |||||
) | |||||
else: | else: | ||||
self._emit(tokens.TagCloseOpen(padding=padding)) | self._emit(tokens.TagCloseOpen(padding=padding)) | ||||
if contents: | if contents: | ||||
@@ -1103,8 +1154,9 @@ class Tokenizer: | |||||
data.context = _TagOpenData.CX_ATTR_READY | data.context = _TagOpenData.CX_ATTR_READY | ||||
while True: | while True: | ||||
this = self._read() | this = self._read() | ||||
can_exit = (not data.context & data.CX_QUOTED or | |||||
data.context & data.CX_NOTE_SPACE) | |||||
can_exit = ( | |||||
not data.context & data.CX_QUOTED or data.context & data.CX_NOTE_SPACE | |||||
) | |||||
if this == end_token and can_exit: | if this == end_token and can_exit: | ||||
if data.context & (data.CX_ATTR_NAME | data.CX_ATTR_VALUE): | if data.context & (data.CX_ATTR_NAME | data.CX_ATTR_VALUE): | ||||
self._push_tag_buffer(data) | self._push_tag_buffer(data) | ||||
@@ -1187,30 +1239,34 @@ class Tokenizer: | |||||
self._head -= 1 | self._head -= 1 | ||||
return | return | ||||
cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | | |||||
line_context | contexts.TABLE_CELL_STYLE) | |||||
cell = self._parse( | |||||
contexts.TABLE_OPEN | |||||
| contexts.TABLE_CELL_OPEN | |||||
| line_context | |||||
| contexts.TABLE_CELL_STYLE | |||||
) | |||||
cell_context = self._context | cell_context = self._context | ||||
self._context = old_context | self._context = old_context | ||||
reset_for_style = cell_context & contexts.TABLE_CELL_STYLE | reset_for_style = cell_context & contexts.TABLE_CELL_STYLE | ||||
if reset_for_style: | if reset_for_style: | ||||
self._head = reset | self._head = reset | ||||
self._push(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | | |||||
line_context) | |||||
self._push(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context) | |||||
padding = self._handle_table_style("|") | padding = self._handle_table_style("|") | ||||
style = self._pop() | style = self._pop() | ||||
# Don't parse the style separator: | # Don't parse the style separator: | ||||
self._head += 1 | self._head += 1 | ||||
cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | | |||||
line_context) | |||||
cell = self._parse( | |||||
contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context | |||||
) | |||||
cell_context = self._context | cell_context = self._context | ||||
self._context = old_context | self._context = old_context | ||||
close_open_markup = "|" if reset_for_style else None | close_open_markup = "|" if reset_for_style else None | ||||
self._emit_table_tag(markup, tag, style, padding, close_open_markup, | |||||
cell, "") | |||||
self._emit_table_tag(markup, tag, style, padding, close_open_markup, cell, "") | |||||
# Keep header/cell line contexts: | # Keep header/cell line contexts: | ||||
self._context |= cell_context & (contexts.TABLE_TH_LINE | | |||||
contexts.TABLE_TD_LINE) | |||||
self._context |= cell_context & ( | |||||
contexts.TABLE_TH_LINE | contexts.TABLE_TD_LINE | |||||
) | |||||
# Offset displacement done by parse(): | # Offset displacement done by parse(): | ||||
self._head -= 1 | self._head -= 1 | ||||
@@ -1333,7 +1389,11 @@ class Tokenizer: | |||||
elif this == "|" and self._context & contexts.TEMPLATE: | elif this == "|" and self._context & contexts.TEMPLATE: | ||||
self._handle_template_param() | self._handle_template_param() | ||||
elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY: | elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY: | ||||
if not self._global & contexts.GL_HEADING and self._read(-1) in ("\n", self.START) and nxt == "=": | |||||
if ( | |||||
not self._global & contexts.GL_HEADING | |||||
and self._read(-1) in ("\n", self.START) | |||||
and nxt == "=" | |||||
): | |||||
self._parse_heading() | self._parse_heading() | ||||
else: | else: | ||||
self._handle_template_param_value() | self._handle_template_param_value() | ||||
@@ -1362,7 +1422,11 @@ class Tokenizer: | |||||
self._parse_external_link(False) | self._parse_external_link(False) | ||||
elif this == "]" and self._context & contexts.EXT_LINK_TITLE: | elif this == "]" and self._context & contexts.EXT_LINK_TITLE: | ||||
return self._pop() | return self._pop() | ||||
elif this == "=" and not self._global & contexts.GL_HEADING and not self._context & contexts.TEMPLATE: | |||||
elif ( | |||||
this == "=" | |||||
and not self._global & contexts.GL_HEADING | |||||
and not self._context & contexts.TEMPLATE | |||||
): | |||||
if self._read(-1) in ("\n", self.START): | if self._read(-1) in ("\n", self.START): | ||||
self._parse_heading() | self._parse_heading() | ||||
else: | else: | ||||
@@ -1397,7 +1461,8 @@ class Tokenizer: | |||||
elif self._read(-1) in ("\n", self.START) and this in ("#", "*", ";", ":"): | elif self._read(-1) in ("\n", self.START) and this in ("#", "*", ";", ":"): | ||||
self._handle_list() | self._handle_list() | ||||
elif self._read(-1) in ("\n", self.START) and ( | elif self._read(-1) in ("\n", self.START) and ( | ||||
this == nxt == self._read(2) == self._read(3) == "-"): | |||||
this == nxt == self._read(2) == self._read(3) == "-" | |||||
): | |||||
self._handle_hr() | self._handle_hr() | ||||
elif this in ("\n", ":") and self._context & contexts.DL_TERM: | elif this in ("\n", ":") and self._context & contexts.DL_TERM: | ||||
self._handle_dl_term() | self._handle_dl_term() | ||||
@@ -1405,9 +1470,17 @@ class Tokenizer: | |||||
# Kill potential table contexts | # Kill potential table contexts | ||||
self._context &= ~contexts.TABLE_CELL_LINE_CONTEXTS | self._context &= ~contexts.TABLE_CELL_LINE_CONTEXTS | ||||
# Start of table parsing | # Start of table parsing | ||||
elif this == "{" and nxt == "|" and ( | |||||
self._read(-1) in ("\n", self.START) or | |||||
(self._read(-2) in ("\n", self.START) and self._read(-1).isspace())): | |||||
elif ( | |||||
this == "{" | |||||
and nxt == "|" | |||||
and ( | |||||
self._read(-1) in ("\n", self.START) | |||||
or ( | |||||
self._read(-2) in ("\n", self.START) | |||||
and self._read(-1).isspace() | |||||
) | |||||
) | |||||
): | |||||
if self._can_recurse(): | if self._can_recurse(): | ||||
self._parse_table() | self._parse_table() | ||||
else: | else: | ||||
@@ -1431,8 +1504,9 @@ class Tokenizer: | |||||
elif this == "\n" and self._context & contexts.TABLE_CELL_LINE_CONTEXTS: | elif this == "\n" and self._context & contexts.TABLE_CELL_LINE_CONTEXTS: | ||||
self._context &= ~contexts.TABLE_CELL_LINE_CONTEXTS | self._context &= ~contexts.TABLE_CELL_LINE_CONTEXTS | ||||
self._emit_text(this) | self._emit_text(this) | ||||
elif (self._read(-1) in ("\n", self.START) or | |||||
(self._read(-2) in ("\n", self.START) and self._read(-1).isspace())): | |||||
elif self._read(-1) in ("\n", self.START) or ( | |||||
self._read(-2) in ("\n", self.START) and self._read(-1).isspace() | |||||
): | |||||
if this == "|" and nxt == "}": | if this == "|" and nxt == "}": | ||||
if self._context & contexts.TABLE_CELL_OPEN: | if self._context & contexts.TABLE_CELL_OPEN: | ||||
return self._handle_table_cell_end() | return self._handle_table_cell_end() | ||||
@@ -28,6 +28,7 @@ the :class`.Wikicode` tree by the :class:`.Builder`. | |||||
__all__ = ["Token"] | __all__ = ["Token"] | ||||
class Token(dict): | class Token(dict): | ||||
"""A token stores the semantic meaning of a unit of wikicode.""" | """A token stores the semantic meaning of a unit of wikicode.""" | ||||
@@ -61,43 +62,44 @@ def make(name): | |||||
__all__.append(name) | __all__.append(name) | ||||
return type(name, (Token,), {}) | return type(name, (Token,), {}) | ||||
Text = make("Text") | Text = make("Text") | ||||
TemplateOpen = make("TemplateOpen") # {{ | |||||
TemplateParamSeparator = make("TemplateParamSeparator") # | | |||||
TemplateParamEquals = make("TemplateParamEquals") # = | |||||
TemplateClose = make("TemplateClose") # }} | |||||
TemplateOpen = make("TemplateOpen") # {{ | |||||
TemplateParamSeparator = make("TemplateParamSeparator") # | | |||||
TemplateParamEquals = make("TemplateParamEquals") # = | |||||
TemplateClose = make("TemplateClose") # }} | |||||
ArgumentOpen = make("ArgumentOpen") # {{{ | |||||
ArgumentSeparator = make("ArgumentSeparator") # | | |||||
ArgumentClose = make("ArgumentClose") # }}} | |||||
ArgumentOpen = make("ArgumentOpen") # {{{ | |||||
ArgumentSeparator = make("ArgumentSeparator") # | | |||||
ArgumentClose = make("ArgumentClose") # }}} | |||||
WikilinkOpen = make("WikilinkOpen") # [[ | |||||
WikilinkSeparator = make("WikilinkSeparator") # | | |||||
WikilinkClose = make("WikilinkClose") # ]] | |||||
WikilinkOpen = make("WikilinkOpen") # [[ | |||||
WikilinkSeparator = make("WikilinkSeparator") # | | |||||
WikilinkClose = make("WikilinkClose") # ]] | |||||
ExternalLinkOpen = make("ExternalLinkOpen") # [ | |||||
ExternalLinkSeparator = make("ExternalLinkSeparator") # | |||||
ExternalLinkClose = make("ExternalLinkClose") # ] | |||||
ExternalLinkOpen = make("ExternalLinkOpen") # [ | |||||
ExternalLinkSeparator = make("ExternalLinkSeparator") # | |||||
ExternalLinkClose = make("ExternalLinkClose") # ] | |||||
HTMLEntityStart = make("HTMLEntityStart") # & | |||||
HTMLEntityNumeric = make("HTMLEntityNumeric") # # | |||||
HTMLEntityHex = make("HTMLEntityHex") # x | |||||
HTMLEntityEnd = make("HTMLEntityEnd") # ; | |||||
HTMLEntityStart = make("HTMLEntityStart") # & | |||||
HTMLEntityNumeric = make("HTMLEntityNumeric") # # | |||||
HTMLEntityHex = make("HTMLEntityHex") # x | |||||
HTMLEntityEnd = make("HTMLEntityEnd") # ; | |||||
HeadingStart = make("HeadingStart") # =... | |||||
HeadingEnd = make("HeadingEnd") # =... | |||||
HeadingStart = make("HeadingStart") # =... | |||||
HeadingEnd = make("HeadingEnd") # =... | |||||
CommentStart = make("CommentStart") # <!-- | |||||
CommentEnd = make("CommentEnd") # --> | |||||
CommentStart = make("CommentStart") # <!-- | |||||
CommentEnd = make("CommentEnd") # --> | |||||
TagOpenOpen = make("TagOpenOpen") # < | |||||
TagOpenOpen = make("TagOpenOpen") # < | |||||
TagAttrStart = make("TagAttrStart") | TagAttrStart = make("TagAttrStart") | ||||
TagAttrEquals = make("TagAttrEquals") # = | |||||
TagAttrQuote = make("TagAttrQuote") # ", ' | |||||
TagCloseOpen = make("TagCloseOpen") # > | |||||
TagCloseSelfclose = make("TagCloseSelfclose") # /> | |||||
TagOpenClose = make("TagOpenClose") # </ | |||||
TagCloseClose = make("TagCloseClose") # > | |||||
TagAttrEquals = make("TagAttrEquals") # = | |||||
TagAttrQuote = make("TagAttrQuote") # ", ' | |||||
TagCloseOpen = make("TagCloseOpen") # > | |||||
TagCloseSelfclose = make("TagCloseSelfclose") # /> | |||||
TagOpenClose = make("TagOpenClose") # </ | |||||
TagCloseClose = make("TagCloseClose") # > | |||||
del make | del make |
@@ -167,7 +167,7 @@ class ListProxy(_SliceNormalizerMixIn, list): | |||||
def _render(self): | def _render(self): | ||||
"""Return the actual list from the stored start/stop/step.""" | """Return the actual list from the stored start/stop/step.""" | ||||
return list(self._parent)[self._start:self._stop:self._step] | |||||
return list(self._parent)[self._start : self._stop : self._step] | |||||
@inheritdoc | @inheritdoc | ||||
def append(self, item): | def append(self, item): | ||||
@@ -187,7 +187,7 @@ class ListProxy(_SliceNormalizerMixIn, list): | |||||
@inheritdoc | @inheritdoc | ||||
def extend(self, item): | def extend(self, item): | ||||
self._parent[self._stop:self._stop] = item | |||||
self._parent[self._stop : self._stop] = item | |||||
@inheritdoc | @inheritdoc | ||||
def insert(self, index, item): | def insert(self, index, item): | ||||
@@ -215,7 +215,7 @@ class ListProxy(_SliceNormalizerMixIn, list): | |||||
def reverse(self): | def reverse(self): | ||||
item = self._render() | item = self._render() | ||||
item.reverse() | item.reverse() | ||||
self._parent[self._start:self._stop:self._step] = item | |||||
self._parent[self._start : self._stop : self._step] = item | |||||
@inheritdoc | @inheritdoc | ||||
def sort(self, key=None, reverse=None): | def sort(self, key=None, reverse=None): | ||||
@@ -226,4 +226,4 @@ class ListProxy(_SliceNormalizerMixIn, list): | |||||
if reverse is not None: | if reverse is not None: | ||||
kwargs["reverse"] = reverse | kwargs["reverse"] = reverse | ||||
item.sort(**kwargs) | item.sort(**kwargs) | ||||
self._parent[self._start:self._stop:self._step] = item | |||||
self._parent[self._start : self._stop : self._step] = item |
@@ -27,6 +27,7 @@ from sys import getdefaultencoding | |||||
__all__ = ["StringMixIn"] | __all__ = ["StringMixIn"] | ||||
def inheritdoc(method): | def inheritdoc(method): | ||||
"""Set __doc__ of *method* to __doc__ of *method* in its parent class. | """Set __doc__ of *method* to __doc__ of *method* in its parent class. | ||||
@@ -36,6 +37,7 @@ def inheritdoc(method): | |||||
method.__doc__ = getattr(str, method.__name__).__doc__ | method.__doc__ = getattr(str, method.__name__).__doc__ | ||||
return method | return method | ||||
class StringMixIn: | class StringMixIn: | ||||
"""Implement the interface for ``str`` in a dynamic manner. | """Implement the interface for ``str`` in a dynamic manner. | ||||
@@ -92,8 +94,9 @@ class StringMixIn: | |||||
def __getattr__(self, attr): | def __getattr__(self, attr): | ||||
if not hasattr(str, attr): | if not hasattr(str, attr): | ||||
raise AttributeError("{!r} object has no attribute {!r}".format( | |||||
type(self).__name__, attr)) | |||||
raise AttributeError( | |||||
"{!r} object has no attribute {!r}".format(type(self).__name__, attr) | |||||
) | |||||
return getattr(self.__str__(), attr) | return getattr(self.__str__(), attr) | ||||
maketrans = str.maketrans # Static method can't rely on __getattr__ | maketrans = str.maketrans # Static method can't rely on __getattr__ | ||||
@@ -25,6 +25,7 @@ users generally won't need stuff from here. | |||||
__all__ = ["parse_anything"] | __all__ = ["parse_anything"] | ||||
def parse_anything(value, context=0, skip_style_tags=False): | def parse_anything(value, context=0, skip_style_tags=False): | ||||
"""Return a :class:`.Wikicode` for *value*, allowing multiple types. | """Return a :class:`.Wikicode` for *value*, allowing multiple types. | ||||
@@ -64,6 +65,8 @@ def parse_anything(value, context=0, skip_style_tags=False): | |||||
nodelist += parse_anything(item, context, skip_style_tags).nodes | nodelist += parse_anything(item, context, skip_style_tags).nodes | ||||
return Wikicode(nodelist) | return Wikicode(nodelist) | ||||
except TypeError as exc: | except TypeError as exc: | ||||
error = ("Needs string, Node, Wikicode, file, int, None, or " | |||||
"iterable of these, but got {0}: {1}") | |||||
error = ( | |||||
"Needs string, Node, Wikicode, file, int, None, or " | |||||
"iterable of these, but got {0}: {1}" | |||||
) | |||||
raise ValueError(error.format(type(value).__name__, value)) from exc | raise ValueError(error.format(type(value).__name__, value)) from exc |
@@ -21,8 +21,18 @@ | |||||
import re | import re | ||||
from itertools import chain | from itertools import chain | ||||
from .nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, | |||||
Node, Tag, Template, Text, Wikilink) | |||||
from .nodes import ( | |||||
Argument, | |||||
Comment, | |||||
ExternalLink, | |||||
Heading, | |||||
HTMLEntity, | |||||
Node, | |||||
Tag, | |||||
Template, | |||||
Text, | |||||
Wikilink, | |||||
) | |||||
from .smart_list.list_proxy import ListProxy | from .smart_list.list_proxy import ListProxy | ||||
from .string_mixin import StringMixIn | from .string_mixin import StringMixIn | ||||
from .utils import parse_anything | from .utils import parse_anything | ||||
@@ -31,6 +41,7 @@ __all__ = ["Wikicode"] | |||||
FLAGS = re.IGNORECASE | re.DOTALL | re.UNICODE | FLAGS = re.IGNORECASE | re.DOTALL | re.UNICODE | ||||
class Wikicode(StringMixIn): | class Wikicode(StringMixIn): | ||||
"""A ``Wikicode`` is a container for nodes that operates like a string. | """A ``Wikicode`` is a container for nodes that operates like a string. | ||||
@@ -41,6 +52,7 @@ class Wikicode(StringMixIn): | |||||
<ifilter>` series of functions is very useful for extracting and iterating | <ifilter>` series of functions is very useful for extracting and iterating | ||||
over, for example, all of the templates in the object. | over, for example, all of the templates in the object. | ||||
""" | """ | ||||
RECURSE_OTHERS = 2 | RECURSE_OTHERS = 2 | ||||
def __init__(self, nodes): | def __init__(self, nodes): | ||||
@@ -82,8 +94,9 @@ class Wikicode(StringMixIn): | |||||
return lambda obj: re.search(matches, str(obj), flags) | return lambda obj: re.search(matches, str(obj), flags) | ||||
return lambda obj: True | return lambda obj: True | ||||
def _indexed_ifilter(self, recursive=True, matches=None, flags=FLAGS, | |||||
forcetype=None): | |||||
def _indexed_ifilter( | |||||
self, recursive=True, matches=None, flags=FLAGS, forcetype=None | |||||
): | |||||
"""Iterate over nodes and their corresponding indices in the node list. | """Iterate over nodes and their corresponding indices in the node list. | ||||
The arguments are interpreted as for :meth:`ifilter`. For each tuple | The arguments are interpreted as for :meth:`ifilter`. For each tuple | ||||
@@ -94,9 +107,11 @@ class Wikicode(StringMixIn): | |||||
match = self._build_matcher(matches, flags) | match = self._build_matcher(matches, flags) | ||||
if recursive: | if recursive: | ||||
restrict = forcetype if recursive == self.RECURSE_OTHERS else None | restrict = forcetype if recursive == self.RECURSE_OTHERS else None | ||||
def getter(i, node): | def getter(i, node): | ||||
for ch in self._get_children(node, restrict=restrict): | for ch in self._get_children(node, restrict=restrict): | ||||
yield (i, ch) | yield (i, ch) | ||||
inodes = chain(*(getter(i, n) for i, n in enumerate(self.nodes))) | inodes = chain(*(getter(i, n) for i, n in enumerate(self.nodes))) | ||||
else: | else: | ||||
inodes = enumerate(self.nodes) | inodes = enumerate(self.nodes) | ||||
@@ -106,6 +121,7 @@ class Wikicode(StringMixIn): | |||||
def _is_child_wikicode(self, obj, recursive=True): | def _is_child_wikicode(self, obj, recursive=True): | ||||
"""Return whether the given :class:`.Wikicode` is a descendant.""" | """Return whether the given :class:`.Wikicode` is a descendant.""" | ||||
def deref(nodes): | def deref(nodes): | ||||
if isinstance(nodes, ListProxy): | if isinstance(nodes, ListProxy): | ||||
return nodes._parent # pylint: disable=protected-access | return nodes._parent # pylint: disable=protected-access | ||||
@@ -210,6 +226,7 @@ class Wikicode(StringMixIn): | |||||
should be any object that can be tested for with ``is``. *indent* is | should be any object that can be tested for with ``is``. *indent* is | ||||
the starting indentation. | the starting indentation. | ||||
""" | """ | ||||
def write(*args): | def write(*args): | ||||
"""Write a new line following the proper indentation rules.""" | """Write a new line following the proper indentation rules.""" | ||||
if lines and lines[-1] is marker: # Continue from the last line | if lines and lines[-1] is marker: # Continue from the last line | ||||
@@ -243,10 +260,12 @@ class Wikicode(StringMixIn): | |||||
This is equivalent to :meth:`{1}` with *forcetype* set to | This is equivalent to :meth:`{1}` with *forcetype* set to | ||||
:class:`~{2.__module__}.{2.__name__}`. | :class:`~{2.__module__}.{2.__name__}`. | ||||
""" | """ | ||||
make_ifilter = lambda ftype: (lambda self, *a, **kw: | |||||
self.ifilter(forcetype=ftype, *a, **kw)) | |||||
make_filter = lambda ftype: (lambda self, *a, **kw: | |||||
self.filter(forcetype=ftype, *a, **kw)) | |||||
make_ifilter = lambda ftype: ( | |||||
lambda self, *a, **kw: self.ifilter(forcetype=ftype, *a, **kw) | |||||
) | |||||
make_filter = lambda ftype: ( | |||||
lambda self, *a, **kw: self.filter(forcetype=ftype, *a, **kw) | |||||
) | |||||
for name, ftype in meths.items(): | for name, ftype in meths.items(): | ||||
ifilt = make_ifilter(ftype) | ifilt = make_ifilter(ftype) | ||||
filt = make_filter(ftype) | filt = make_filter(ftype) | ||||
@@ -342,6 +361,7 @@ class Wikicode(StringMixIn): | |||||
Will return an empty list if *obj* is at the top level of this Wikicode | Will return an empty list if *obj* is at the top level of this Wikicode | ||||
object. Will raise :exc:`ValueError` if it wasn't found. | object. Will raise :exc:`ValueError` if it wasn't found. | ||||
""" | """ | ||||
def _get_ancestors(code, needle): | def _get_ancestors(code, needle): | ||||
for node in code.nodes: | for node in code.nodes: | ||||
if node is needle: | if node is needle: | ||||
@@ -510,8 +530,7 @@ class Wikicode(StringMixIn): | |||||
return True | return True | ||||
return False | return False | ||||
def ifilter(self, recursive=True, matches=None, flags=FLAGS, | |||||
forcetype=None): | |||||
def ifilter(self, recursive=True, matches=None, flags=FLAGS, forcetype=None): | |||||
"""Iterate over nodes in our list matching certain conditions. | """Iterate over nodes in our list matching certain conditions. | ||||
If *forcetype* is given, only nodes that are instances of this type (or | If *forcetype* is given, only nodes that are instances of this type (or | ||||
@@ -545,8 +564,15 @@ class Wikicode(StringMixIn): | |||||
""" | """ | ||||
return list(self.ifilter(*args, **kwargs)) | return list(self.ifilter(*args, **kwargs)) | ||||
def get_sections(self, levels=None, matches=None, flags=FLAGS, flat=False, | |||||
include_lead=None, include_headings=True): | |||||
def get_sections( | |||||
self, | |||||
levels=None, | |||||
matches=None, | |||||
flags=FLAGS, | |||||
flat=False, | |||||
include_lead=None, | |||||
include_headings=True, | |||||
): | |||||
"""Return a list of sections within the page. | """Return a list of sections within the page. | ||||
Sections are returned as :class:`.Wikicode` objects with a shared node | Sections are returned as :class:`.Wikicode` objects with a shared node | ||||
@@ -568,12 +594,14 @@ class Wikicode(StringMixIn): | |||||
:class:`.Heading` object will be included; otherwise, this is skipped. | :class:`.Heading` object will be included; otherwise, this is skipped. | ||||
""" | """ | ||||
title_matcher = self._build_matcher(matches, flags) | title_matcher = self._build_matcher(matches, flags) | ||||
matcher = lambda heading: (title_matcher(heading.title) and | |||||
(not levels or heading.level in levels)) | |||||
matcher = lambda heading: ( | |||||
title_matcher(heading.title) and (not levels or heading.level in levels) | |||||
) | |||||
iheadings = self._indexed_ifilter(recursive=False, forcetype=Heading) | iheadings = self._indexed_ifilter(recursive=False, forcetype=Heading) | ||||
sections = [] # Tuples of (index_of_first_node, section) | sections = [] # Tuples of (index_of_first_node, section) | ||||
open_headings = [] # Tuples of (index, heading), where index and | |||||
# heading.level are both monotonically increasing | |||||
# Tuples of (index, heading), where index and heading.level are both | |||||
# monotonically increasing | |||||
open_headings = [] | |||||
# Add the lead section if appropriate: | # Add the lead section if appropriate: | ||||
if include_lead or not (include_lead is not None or matches or levels): | if include_lead or not (include_lead is not None or matches or levels): | ||||
@@ -610,8 +638,7 @@ class Wikicode(StringMixIn): | |||||
# Ensure that earlier sections are earlier in the returned list: | # Ensure that earlier sections are earlier in the returned list: | ||||
return [section for i, section in sorted(sections)] | return [section for i, section in sorted(sections)] | ||||
def strip_code(self, normalize=True, collapse=True, | |||||
keep_template_params=False): | |||||
def strip_code(self, normalize=True, collapse=True, keep_template_params=False): | |||||
"""Return a rendered string without unprintable code such as templates. | """Return a rendered string without unprintable code such as templates. | ||||
The way a node is stripped is handled by the | The way a node is stripped is handled by the | ||||
@@ -631,7 +658,7 @@ class Wikicode(StringMixIn): | |||||
kwargs = { | kwargs = { | ||||
"normalize": normalize, | "normalize": normalize, | ||||
"collapse": collapse, | "collapse": collapse, | ||||
"keep_template_params": keep_template_params | |||||
"keep_template_params": keep_template_params, | |||||
} | } | ||||
nodes = [] | nodes = [] | ||||
@@ -673,7 +700,15 @@ class Wikicode(StringMixIn): | |||||
marker = object() # Random object we can find with certainty in a list | marker = object() # Random object we can find with certainty in a list | ||||
return "\n".join(self._get_tree(self, [], marker, 0)) | return "\n".join(self._get_tree(self, [], marker, 0)) | ||||
Wikicode._build_filter_methods( | Wikicode._build_filter_methods( | ||||
arguments=Argument, comments=Comment, external_links=ExternalLink, | |||||
headings=Heading, html_entities=HTMLEntity, tags=Tag, templates=Template, | |||||
text=Text, wikilinks=Wikilink) | |||||
arguments=Argument, | |||||
comments=Comment, | |||||
external_links=ExternalLink, | |||||
headings=Heading, | |||||
html_entities=HTMLEntity, | |||||
tags=Tag, | |||||
templates=Template, | |||||
text=Text, | |||||
wikilinks=Wikilink, | |||||
) |
@@ -18,14 +18,24 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
from mwparserfromhell.nodes import (Argument, Comment, ExternalLink, Heading, | |||||
HTMLEntity, Tag, Template, Text, Wikilink) | |||||
from mwparserfromhell.nodes import ( | |||||
Argument, | |||||
Comment, | |||||
ExternalLink, | |||||
Heading, | |||||
HTMLEntity, | |||||
Tag, | |||||
Template, | |||||
Text, | |||||
Wikilink, | |||||
) | |||||
from mwparserfromhell.smart_list import SmartList | from mwparserfromhell.smart_list import SmartList | ||||
from mwparserfromhell.wikicode import Wikicode | from mwparserfromhell.wikicode import Wikicode | ||||
wrap = lambda L: Wikicode(SmartList(L)) | wrap = lambda L: Wikicode(SmartList(L)) | ||||
wraptext = lambda *args: wrap([Text(t) for t in args]) | wraptext = lambda *args: wrap([Text(t) for t in args]) | ||||
def _assert_node_equal(expected, actual): | def _assert_node_equal(expected, actual): | ||||
"""Assert that two Nodes have the same type and have the same data.""" | """Assert that two Nodes have the same type and have the same data.""" | ||||
registry = { | registry = { | ||||
@@ -43,6 +53,7 @@ def _assert_node_equal(expected, actual): | |||||
assert type(expected) == type(actual) | assert type(expected) == type(actual) | ||||
registry[type(expected)](expected, actual) | registry[type(expected)](expected, actual) | ||||
def _assert_argument_node_equal(expected, actual): | def _assert_argument_node_equal(expected, actual): | ||||
"""Assert that two Argument nodes have the same data.""" | """Assert that two Argument nodes have the same data.""" | ||||
assert_wikicode_equal(expected.name, actual.name) | assert_wikicode_equal(expected.name, actual.name) | ||||
@@ -51,10 +62,12 @@ def _assert_argument_node_equal(expected, actual): | |||||
else: | else: | ||||
assert actual.default is None | assert actual.default is None | ||||
def _assert_comment_node_equal(expected, actual): | def _assert_comment_node_equal(expected, actual): | ||||
"""Assert that two Comment nodes have the same data.""" | """Assert that two Comment nodes have the same data.""" | ||||
assert expected.contents == actual.contents | assert expected.contents == actual.contents | ||||
def _assert_external_link_node_equal(expected, actual): | def _assert_external_link_node_equal(expected, actual): | ||||
"""Assert that two ExternalLink nodes have the same data.""" | """Assert that two ExternalLink nodes have the same data.""" | ||||
assert_wikicode_equal(expected.url, actual.url) | assert_wikicode_equal(expected.url, actual.url) | ||||
@@ -65,11 +78,13 @@ def _assert_external_link_node_equal(expected, actual): | |||||
assert expected.brackets is actual.brackets | assert expected.brackets is actual.brackets | ||||
assert expected.suppress_space is actual.suppress_space | assert expected.suppress_space is actual.suppress_space | ||||
def _assert_heading_node_equal(expected, actual): | def _assert_heading_node_equal(expected, actual): | ||||
"""Assert that two Heading nodes have the same data.""" | """Assert that two Heading nodes have the same data.""" | ||||
assert_wikicode_equal(expected.title, actual.title) | assert_wikicode_equal(expected.title, actual.title) | ||||
assert expected.level == actual.level | assert expected.level == actual.level | ||||
def _assert_html_entity_node_equal(expected, actual): | def _assert_html_entity_node_equal(expected, actual): | ||||
"""Assert that two HTMLEntity nodes have the same data.""" | """Assert that two HTMLEntity nodes have the same data.""" | ||||
assert expected.value == actual.value | assert expected.value == actual.value | ||||
@@ -77,6 +92,7 @@ def _assert_html_entity_node_equal(expected, actual): | |||||
assert expected.hexadecimal is actual.hexadecimal | assert expected.hexadecimal is actual.hexadecimal | ||||
assert expected.hex_char == actual.hex_char | assert expected.hex_char == actual.hex_char | ||||
def _assert_tag_node_equal(expected, actual): | def _assert_tag_node_equal(expected, actual): | ||||
"""Assert that two Tag nodes have the same data.""" | """Assert that two Tag nodes have the same data.""" | ||||
assert_wikicode_equal(expected.tag, actual.tag) | assert_wikicode_equal(expected.tag, actual.tag) | ||||
@@ -105,6 +121,7 @@ def _assert_tag_node_equal(expected, actual): | |||||
assert expected.padding == actual.padding | assert expected.padding == actual.padding | ||||
assert_wikicode_equal(expected.closing_tag, actual.closing_tag) | assert_wikicode_equal(expected.closing_tag, actual.closing_tag) | ||||
def _assert_template_node_equal(expected, actual): | def _assert_template_node_equal(expected, actual): | ||||
"""Assert that two Template nodes have the same data.""" | """Assert that two Template nodes have the same data.""" | ||||
assert_wikicode_equal(expected.name, actual.name) | assert_wikicode_equal(expected.name, actual.name) | ||||
@@ -117,10 +134,12 @@ def _assert_template_node_equal(expected, actual): | |||||
assert_wikicode_equal(exp_param.value, act_param.value) | assert_wikicode_equal(exp_param.value, act_param.value) | ||||
assert exp_param.showkey is act_param.showkey | assert exp_param.showkey is act_param.showkey | ||||
def _assert_text_node_equal(expected, actual): | def _assert_text_node_equal(expected, actual): | ||||
"""Assert that two Text nodes have the same data.""" | """Assert that two Text nodes have the same data.""" | ||||
assert expected.value == actual.value | assert expected.value == actual.value | ||||
def _assert_wikilink_node_equal(expected, actual): | def _assert_wikilink_node_equal(expected, actual): | ||||
"""Assert that two Wikilink nodes have the same data.""" | """Assert that two Wikilink nodes have the same data.""" | ||||
assert_wikicode_equal(expected.title, actual.title) | assert_wikicode_equal(expected.title, actual.title) | ||||
@@ -129,6 +148,7 @@ def _assert_wikilink_node_equal(expected, actual): | |||||
else: | else: | ||||
assert actual.text is None | assert actual.text is None | ||||
def assert_wikicode_equal(expected, actual): | def assert_wikicode_equal(expected, actual): | ||||
"""Assert that two Wikicode objects have the same data.""" | """Assert that two Wikicode objects have the same data.""" | ||||
assert isinstance(actual, Wikicode) | assert isinstance(actual, Wikicode) | ||||
@@ -27,6 +27,7 @@ import pytest | |||||
from mwparserfromhell.nodes import Argument, Text | from mwparserfromhell.nodes import Argument, Text | ||||
from .conftest import assert_wikicode_equal, wrap, wraptext | from .conftest import assert_wikicode_equal, wrap, wraptext | ||||
def test_str(): | def test_str(): | ||||
"""test Argument.__str__()""" | """test Argument.__str__()""" | ||||
node = Argument(wraptext("foobar")) | node = Argument(wraptext("foobar")) | ||||
@@ -34,6 +35,7 @@ def test_str(): | |||||
node2 = Argument(wraptext("foo"), wraptext("bar")) | node2 = Argument(wraptext("foo"), wraptext("bar")) | ||||
assert "{{{foo|bar}}}" == str(node2) | assert "{{{foo|bar}}}" == str(node2) | ||||
def test_children(): | def test_children(): | ||||
"""test Argument.__children__()""" | """test Argument.__children__()""" | ||||
node1 = Argument(wraptext("foobar")) | node1 = Argument(wraptext("foobar")) | ||||
@@ -48,6 +50,7 @@ def test_children(): | |||||
with pytest.raises(StopIteration): | with pytest.raises(StopIteration): | ||||
next(gen2) | next(gen2) | ||||
def test_strip(): | def test_strip(): | ||||
"""test Argument.__strip__()""" | """test Argument.__strip__()""" | ||||
node1 = Argument(wraptext("foobar")) | node1 = Argument(wraptext("foobar")) | ||||
@@ -55,6 +58,7 @@ def test_strip(): | |||||
assert node1.__strip__() is None | assert node1.__strip__() is None | ||||
assert "bar" == node2.__strip__() | assert "bar" == node2.__strip__() | ||||
def test_showtree(): | def test_showtree(): | ||||
"""test Argument.__showtree__()""" | """test Argument.__showtree__()""" | ||||
output = [] | output = [] | ||||
@@ -66,10 +70,19 @@ def test_showtree(): | |||||
node1.__showtree__(output.append, get, mark) | node1.__showtree__(output.append, get, mark) | ||||
node2.__showtree__(output.append, get, mark) | node2.__showtree__(output.append, get, mark) | ||||
valid = [ | valid = [ | ||||
"{{{", (getter, node1.name), "}}}", "{{{", (getter, node2.name), | |||||
" | ", marker, (getter, node2.default), "}}}"] | |||||
"{{{", | |||||
(getter, node1.name), | |||||
"}}}", | |||||
"{{{", | |||||
(getter, node2.name), | |||||
" | ", | |||||
marker, | |||||
(getter, node2.default), | |||||
"}}}", | |||||
] | |||||
assert valid == output | assert valid == output | ||||
def test_name(): | def test_name(): | ||||
"""test getter/setter for the name attribute""" | """test getter/setter for the name attribute""" | ||||
name = wraptext("foobar") | name = wraptext("foobar") | ||||
@@ -82,6 +95,7 @@ def test_name(): | |||||
assert_wikicode_equal(wraptext("héhehé"), node1.name) | assert_wikicode_equal(wraptext("héhehé"), node1.name) | ||||
assert_wikicode_equal(wraptext("héhehé"), node2.name) | assert_wikicode_equal(wraptext("héhehé"), node2.name) | ||||
def test_default(): | def test_default(): | ||||
"""test getter/setter for the default attribute""" | """test getter/setter for the default attribute""" | ||||
default = wraptext("baz") | default = wraptext("baz") | ||||
@@ -28,6 +28,7 @@ from mwparserfromhell.nodes import Template | |||||
from mwparserfromhell.nodes.extras import Attribute | from mwparserfromhell.nodes.extras import Attribute | ||||
from .conftest import assert_wikicode_equal, wrap, wraptext | from .conftest import assert_wikicode_equal, wrap, wraptext | ||||
def test_str(): | def test_str(): | ||||
"""test Attribute.__str__()""" | """test Attribute.__str__()""" | ||||
node = Attribute(wraptext("foo")) | node = Attribute(wraptext("foo")) | ||||
@@ -43,6 +44,7 @@ def test_str(): | |||||
node6 = Attribute(wraptext("a"), wrap([]), None, " ", "", " ") | node6 = Attribute(wraptext("a"), wrap([]), None, " ", "", " ") | ||||
assert " a= " == str(node6) | assert " a= " == str(node6) | ||||
def test_name(): | def test_name(): | ||||
"""test getter/setter for the name attribute""" | """test getter/setter for the name attribute""" | ||||
name = wraptext("id") | name = wraptext("id") | ||||
@@ -51,6 +53,7 @@ def test_name(): | |||||
node.name = "{{id}}" | node.name = "{{id}}" | ||||
assert_wikicode_equal(wrap([Template(wraptext("id"))]), node.name) | assert_wikicode_equal(wrap([Template(wraptext("id"))]), node.name) | ||||
def test_value(): | def test_value(): | ||||
"""test getter/setter for the value attribute""" | """test getter/setter for the value attribute""" | ||||
value = wraptext("foo") | value = wraptext("foo") | ||||
@@ -74,6 +77,7 @@ def test_value(): | |||||
assert_wikicode_equal(wraptext("fo\"o 'bar' b\"az"), node2.value) | assert_wikicode_equal(wraptext("fo\"o 'bar' b\"az"), node2.value) | ||||
assert '"' == node2.quotes | assert '"' == node2.quotes | ||||
def test_quotes(): | def test_quotes(): | ||||
"""test getter/setter for the quotes attribute""" | """test getter/setter for the quotes attribute""" | ||||
node1 = Attribute(wraptext("id"), wraptext("foo"), None) | node1 = Attribute(wraptext("id"), wraptext("foo"), None) | ||||
@@ -92,6 +96,7 @@ def test_quotes(): | |||||
with pytest.raises(ValueError): | with pytest.raises(ValueError): | ||||
Attribute(wraptext("id"), wraptext("foo bar baz"), None) | Attribute(wraptext("id"), wraptext("foo bar baz"), None) | ||||
def test_padding(): | def test_padding(): | ||||
"""test getter/setter for the padding attributes""" | """test getter/setter for the padding attributes""" | ||||
for pad in ["pad_first", "pad_before_eq", "pad_after_eq"]: | for pad in ["pad_first", "pad_before_eq", "pad_after_eq"]: | ||||
@@ -26,11 +26,13 @@ import pytest | |||||
from mwparserfromhell.nodes import Comment | from mwparserfromhell.nodes import Comment | ||||
def test_str(): | def test_str(): | ||||
"""test Comment.__str__()""" | """test Comment.__str__()""" | ||||
node = Comment("foobar") | node = Comment("foobar") | ||||
assert "<!--foobar-->" == str(node) | assert "<!--foobar-->" == str(node) | ||||
def test_children(): | def test_children(): | ||||
"""test Comment.__children__()""" | """test Comment.__children__()""" | ||||
node = Comment("foobar") | node = Comment("foobar") | ||||
@@ -38,11 +40,13 @@ def test_children(): | |||||
with pytest.raises(StopIteration): | with pytest.raises(StopIteration): | ||||
next(gen) | next(gen) | ||||
def test_strip(): | def test_strip(): | ||||
"""test Comment.__strip__()""" | """test Comment.__strip__()""" | ||||
node = Comment("foobar") | node = Comment("foobar") | ||||
assert node.__strip__() is None | assert node.__strip__() is None | ||||
def test_showtree(): | def test_showtree(): | ||||
"""test Comment.__showtree__()""" | """test Comment.__showtree__()""" | ||||
output = [] | output = [] | ||||
@@ -50,6 +54,7 @@ def test_showtree(): | |||||
node.__showtree__(output.append, None, None) | node.__showtree__(output.append, None, None) | ||||
assert ["<!--foobar-->"] == output | assert ["<!--foobar-->"] == output | ||||
def test_contents(): | def test_contents(): | ||||
"""test getter/setter for the contents attribute""" | """test getter/setter for the contents attribute""" | ||||
node = Comment("foobar") | node = Comment("foobar") | ||||
@@ -32,6 +32,7 @@ import pytest | |||||
import mwparserfromhell | import mwparserfromhell | ||||
def assert_print(value, output): | def assert_print(value, output): | ||||
"""Assertion check that *value*, when printed, produces *output*.""" | """Assertion check that *value*, when printed, produces *output*.""" | ||||
buff = StringIO() | buff = StringIO() | ||||
@@ -39,6 +40,7 @@ def assert_print(value, output): | |||||
buff.seek(0) | buff.seek(0) | ||||
assert output == buff.read() | assert output == buff.read() | ||||
def test_readme_1(): | def test_readme_1(): | ||||
"""test a block of example code in the README""" | """test a block of example code in the README""" | ||||
text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" | text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" | ||||
@@ -52,6 +54,7 @@ def test_readme_1(): | |||||
assert_print(template.get(1).value, "bar") | assert_print(template.get(1).value, "bar") | ||||
assert_print(template.get("eggs").value, "spam") | assert_print(template.get("eggs").value, "spam") | ||||
def test_readme_2(): | def test_readme_2(): | ||||
"""test a block of example code in the README""" | """test a block of example code in the README""" | ||||
text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" | text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" | ||||
@@ -59,17 +62,19 @@ def test_readme_2(): | |||||
res = "['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']" | res = "['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']" | ||||
assert_print(temps, res) | assert_print(temps, res) | ||||
def test_readme_3(): | def test_readme_3(): | ||||
"""test a block of example code in the README""" | """test a block of example code in the README""" | ||||
code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") | code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") | ||||
assert_print(code.filter_templates(recursive=False), | |||||
"['{{foo|this {{includes a|template}}}}']") | |||||
assert_print( | |||||
code.filter_templates(recursive=False), | |||||
"['{{foo|this {{includes a|template}}}}']", | |||||
) | |||||
foo = code.filter_templates(recursive=False)[0] | foo = code.filter_templates(recursive=False)[0] | ||||
assert_print(foo.get(1).value, "this {{includes a|template}}") | assert_print(foo.get(1).value, "this {{includes a|template}}") | ||||
assert_print(foo.get(1).value.filter_templates()[0], | |||||
"{{includes a|template}}") | |||||
assert_print(foo.get(1).value.filter_templates()[0].get(1).value, | |||||
"template") | |||||
assert_print(foo.get(1).value.filter_templates()[0], "{{includes a|template}}") | |||||
assert_print(foo.get(1).value.filter_templates()[0].get(1).value, "template") | |||||
def test_readme_4(): | def test_readme_4(): | ||||
"""test a block of example code in the README""" | """test a block of example code in the README""" | ||||
@@ -90,6 +95,7 @@ def test_readme_4(): | |||||
assert_print(text, res) | assert_print(text, res) | ||||
assert text == code | assert text == code | ||||
@pytest.mark.skipif("NOWEB" in os.environ, reason="web test disabled by environ var") | @pytest.mark.skipif("NOWEB" in os.environ, reason="web test disabled by environ var") | ||||
def test_readme_5(): | def test_readme_5(): | ||||
"""test a block of example code in the README; includes a web call""" | """test a block of example code in the README; includes a web call""" | ||||
@@ -27,6 +27,7 @@ import pytest | |||||
from mwparserfromhell.nodes import ExternalLink, Text | from mwparserfromhell.nodes import ExternalLink, Text | ||||
from .conftest import assert_wikicode_equal, wrap, wraptext | from .conftest import assert_wikicode_equal, wrap, wraptext | ||||
def test_str(): | def test_str(): | ||||
"""test ExternalLink.__str__()""" | """test ExternalLink.__str__()""" | ||||
node = ExternalLink(wraptext("http://example.com/"), brackets=False) | node = ExternalLink(wraptext("http://example.com/"), brackets=False) | ||||
@@ -35,15 +36,16 @@ def test_str(): | |||||
assert "[http://example.com/]" == str(node2) | assert "[http://example.com/]" == str(node2) | ||||
node3 = ExternalLink(wraptext("http://example.com/"), wrap([])) | node3 = ExternalLink(wraptext("http://example.com/"), wrap([])) | ||||
assert "[http://example.com/ ]" == str(node3) | assert "[http://example.com/ ]" == str(node3) | ||||
node4 = ExternalLink(wraptext("http://example.com/"), | |||||
wraptext("Example Web Page")) | |||||
node4 = ExternalLink(wraptext("http://example.com/"), wraptext("Example Web Page")) | |||||
assert "[http://example.com/ Example Web Page]" == str(node4) | assert "[http://example.com/ Example Web Page]" == str(node4) | ||||
def test_children(): | def test_children(): | ||||
"""test ExternalLink.__children__()""" | """test ExternalLink.__children__()""" | ||||
node1 = ExternalLink(wraptext("http://example.com/"), brackets=False) | node1 = ExternalLink(wraptext("http://example.com/"), brackets=False) | ||||
node2 = ExternalLink(wraptext("http://example.com/"), | |||||
wrap([Text("Example"), Text("Page")])) | |||||
node2 = ExternalLink( | |||||
wraptext("http://example.com/"), wrap([Text("Example"), Text("Page")]) | |||||
) | |||||
gen1 = node1.__children__() | gen1 = node1.__children__() | ||||
gen2 = node2.__children__() | gen2 = node2.__children__() | ||||
assert node1.url == next(gen1) | assert node1.url == next(gen1) | ||||
@@ -54,6 +56,7 @@ def test_children(): | |||||
with pytest.raises(StopIteration): | with pytest.raises(StopIteration): | ||||
next(gen2) | next(gen2) | ||||
def test_strip(): | def test_strip(): | ||||
"""test ExternalLink.__strip__()""" | """test ExternalLink.__strip__()""" | ||||
node1 = ExternalLink(wraptext("http://example.com"), brackets=False) | node1 = ExternalLink(wraptext("http://example.com"), brackets=False) | ||||
@@ -66,6 +69,7 @@ def test_strip(): | |||||
assert node3.__strip__() is None | assert node3.__strip__() is None | ||||
assert "Link" == node4.__strip__() | assert "Link" == node4.__strip__() | ||||
def test_showtree(): | def test_showtree(): | ||||
"""test ExternalLink.__showtree__()""" | """test ExternalLink.__showtree__()""" | ||||
output = [] | output = [] | ||||
@@ -76,11 +80,10 @@ def test_showtree(): | |||||
node2 = ExternalLink(wraptext("http://example.com"), wraptext("Link")) | node2 = ExternalLink(wraptext("http://example.com"), wraptext("Link")) | ||||
node1.__showtree__(output.append, get, mark) | node1.__showtree__(output.append, get, mark) | ||||
node2.__showtree__(output.append, get, mark) | node2.__showtree__(output.append, get, mark) | ||||
valid = [ | |||||
(getter, node1.url), "[", (getter, node2.url), | |||||
(getter, node2.title), "]"] | |||||
valid = [(getter, node1.url), "[", (getter, node2.url), (getter, node2.title), "]"] | |||||
assert valid == output | assert valid == output | ||||
def test_url(): | def test_url(): | ||||
"""test getter/setter for the url attribute""" | """test getter/setter for the url attribute""" | ||||
url = wraptext("http://example.com/") | url = wraptext("http://example.com/") | ||||
@@ -93,6 +96,7 @@ def test_url(): | |||||
assert_wikicode_equal(wraptext("mailto:héhehé@spam.com"), node1.url) | assert_wikicode_equal(wraptext("mailto:héhehé@spam.com"), node1.url) | ||||
assert_wikicode_equal(wraptext("mailto:héhehé@spam.com"), node2.url) | assert_wikicode_equal(wraptext("mailto:héhehé@spam.com"), node2.url) | ||||
def test_title(): | def test_title(): | ||||
"""test getter/setter for the title attribute""" | """test getter/setter for the title attribute""" | ||||
title = wraptext("Example!") | title = wraptext("Example!") | ||||
@@ -105,6 +109,7 @@ def test_title(): | |||||
node2.title = "My Website" | node2.title = "My Website" | ||||
assert_wikicode_equal(wraptext("My Website"), node2.title) | assert_wikicode_equal(wraptext("My Website"), node2.title) | ||||
def test_brackets(): | def test_brackets(): | ||||
"""test getter/setter for the brackets attribute""" | """test getter/setter for the brackets attribute""" | ||||
node1 = ExternalLink(wraptext("http://example.com/"), brackets=False) | node1 = ExternalLink(wraptext("http://example.com/"), brackets=False) | ||||
@@ -27,6 +27,7 @@ import pytest | |||||
from mwparserfromhell.nodes import Heading, Text | from mwparserfromhell.nodes import Heading, Text | ||||
from .conftest import assert_wikicode_equal, wrap, wraptext | from .conftest import assert_wikicode_equal, wrap, wraptext | ||||
def test_str(): | def test_str(): | ||||
"""test Heading.__str__()""" | """test Heading.__str__()""" | ||||
node = Heading(wraptext("foobar"), 2) | node = Heading(wraptext("foobar"), 2) | ||||
@@ -34,6 +35,7 @@ def test_str(): | |||||
node2 = Heading(wraptext(" zzz "), 5) | node2 = Heading(wraptext(" zzz "), 5) | ||||
assert "===== zzz =====" == str(node2) | assert "===== zzz =====" == str(node2) | ||||
def test_children(): | def test_children(): | ||||
"""test Heading.__children__()""" | """test Heading.__children__()""" | ||||
node = Heading(wrap([Text("foo"), Text("bar")]), 3) | node = Heading(wrap([Text("foo"), Text("bar")]), 3) | ||||
@@ -42,11 +44,13 @@ def test_children(): | |||||
with pytest.raises(StopIteration): | with pytest.raises(StopIteration): | ||||
next(gen) | next(gen) | ||||
def test_strip(): | def test_strip(): | ||||
"""test Heading.__strip__()""" | """test Heading.__strip__()""" | ||||
node = Heading(wraptext("foobar"), 3) | node = Heading(wraptext("foobar"), 3) | ||||
assert "foobar" == node.__strip__() | assert "foobar" == node.__strip__() | ||||
def test_showtree(): | def test_showtree(): | ||||
"""test Heading.__showtree__()""" | """test Heading.__showtree__()""" | ||||
output = [] | output = [] | ||||
@@ -56,10 +60,10 @@ def test_showtree(): | |||||
node2 = Heading(wraptext(" baz "), 4) | node2 = Heading(wraptext(" baz "), 4) | ||||
node1.__showtree__(output.append, get, None) | node1.__showtree__(output.append, get, None) | ||||
node2.__showtree__(output.append, get, None) | node2.__showtree__(output.append, get, None) | ||||
valid = ["===", (getter, node1.title), "===", | |||||
"====", (getter, node2.title), "===="] | |||||
valid = ["===", (getter, node1.title), "===", "====", (getter, node2.title), "===="] | |||||
assert valid == output | assert valid == output | ||||
def test_title(): | def test_title(): | ||||
"""test getter/setter for the title attribute""" | """test getter/setter for the title attribute""" | ||||
title = wraptext("foobar") | title = wraptext("foobar") | ||||
@@ -68,6 +72,7 @@ def test_title(): | |||||
node.title = "héhehé" | node.title = "héhehé" | ||||
assert_wikicode_equal(wraptext("héhehé"), node.title) | assert_wikicode_equal(wraptext("héhehé"), node.title) | ||||
def test_level(): | def test_level(): | ||||
"""test getter/setter for the level attribute""" | """test getter/setter for the level attribute""" | ||||
node = Heading(wraptext("foobar"), 3) | node = Heading(wraptext("foobar"), 3) | ||||
@@ -26,6 +26,7 @@ import pytest | |||||
from mwparserfromhell.nodes import HTMLEntity | from mwparserfromhell.nodes import HTMLEntity | ||||
def test_str(): | def test_str(): | ||||
"""test HTMLEntity.__str__()""" | """test HTMLEntity.__str__()""" | ||||
node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) | node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) | ||||
@@ -37,6 +38,7 @@ def test_str(): | |||||
assert "k" == str(node3) | assert "k" == str(node3) | ||||
assert "l" == str(node4) | assert "l" == str(node4) | ||||
def test_children(): | def test_children(): | ||||
"""test HTMLEntity.__children__()""" | """test HTMLEntity.__children__()""" | ||||
node = HTMLEntity("nbsp", named=True, hexadecimal=False) | node = HTMLEntity("nbsp", named=True, hexadecimal=False) | ||||
@@ -44,6 +46,7 @@ def test_children(): | |||||
with pytest.raises(StopIteration): | with pytest.raises(StopIteration): | ||||
next(gen) | next(gen) | ||||
def test_strip(): | def test_strip(): | ||||
"""test HTMLEntity.__strip__()""" | """test HTMLEntity.__strip__()""" | ||||
node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) | node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) | ||||
@@ -57,6 +60,7 @@ def test_strip(): | |||||
assert "é" == node3.__strip__(normalize=True) | assert "é" == node3.__strip__(normalize=True) | ||||
assert "é" == node3.__strip__(normalize=False) | assert "é" == node3.__strip__(normalize=False) | ||||
def test_showtree(): | def test_showtree(): | ||||
"""test HTMLEntity.__showtree__()""" | """test HTMLEntity.__showtree__()""" | ||||
output = [] | output = [] | ||||
@@ -69,6 +73,7 @@ def test_showtree(): | |||||
res = [" ", "k", "é"] | res = [" ", "k", "é"] | ||||
assert res == output | assert res == output | ||||
def test_value(): | def test_value(): | ||||
"""test getter/setter for the value attribute""" | """test getter/setter for the value attribute""" | ||||
node1 = HTMLEntity("nbsp") | node1 = HTMLEntity("nbsp") | ||||
@@ -109,6 +114,7 @@ def test_value(): | |||||
with pytest.raises(ValueError): | with pytest.raises(ValueError): | ||||
node1.__setattr__("value", "12FFFF") | node1.__setattr__("value", "12FFFF") | ||||
def test_named(): | def test_named(): | ||||
"""test getter/setter for the named attribute""" | """test getter/setter for the named attribute""" | ||||
node1 = HTMLEntity("nbsp") | node1 = HTMLEntity("nbsp") | ||||
@@ -130,6 +136,7 @@ def test_named(): | |||||
with pytest.raises(ValueError): | with pytest.raises(ValueError): | ||||
node3.__setattr__("named", True) | node3.__setattr__("named", True) | ||||
def test_hexadecimal(): | def test_hexadecimal(): | ||||
"""test getter/setter for the hexadecimal attribute""" | """test getter/setter for the hexadecimal attribute""" | ||||
node1 = HTMLEntity("nbsp") | node1 = HTMLEntity("nbsp") | ||||
@@ -147,6 +154,7 @@ def test_hexadecimal(): | |||||
with pytest.raises(ValueError): | with pytest.raises(ValueError): | ||||
node1.__setattr__("hexadecimal", True) | node1.__setattr__("hexadecimal", True) | ||||
def test_hex_char(): | def test_hex_char(): | ||||
"""test getter/setter for the hex_char attribute""" | """test getter/setter for the hex_char attribute""" | ||||
node1 = HTMLEntity("e9") | node1 = HTMLEntity("e9") | ||||
@@ -164,6 +172,7 @@ def test_hex_char(): | |||||
with pytest.raises(ValueError): | with pytest.raises(ValueError): | ||||
node1.__setattr__("hex_char", True) | node1.__setattr__("hex_char", True) | ||||
def test_normalize(): | def test_normalize(): | ||||
"""test getter/setter for the normalize attribute""" | """test getter/setter for the normalize attribute""" | ||||
node1 = HTMLEntity("nbsp") | node1 = HTMLEntity("nbsp") | ||||
@@ -27,6 +27,7 @@ import pytest | |||||
from mwparserfromhell.nodes.extras import Parameter | from mwparserfromhell.nodes.extras import Parameter | ||||
from .conftest import assert_wikicode_equal, wraptext | from .conftest import assert_wikicode_equal, wraptext | ||||
def test_str(): | def test_str(): | ||||
"""test Parameter.__str__()""" | """test Parameter.__str__()""" | ||||
node = Parameter(wraptext("1"), wraptext("foo"), showkey=False) | node = Parameter(wraptext("1"), wraptext("foo"), showkey=False) | ||||
@@ -34,6 +35,7 @@ def test_str(): | |||||
node2 = Parameter(wraptext("foo"), wraptext("bar")) | node2 = Parameter(wraptext("foo"), wraptext("bar")) | ||||
assert "foo=bar" == str(node2) | assert "foo=bar" == str(node2) | ||||
def test_name(): | def test_name(): | ||||
"""test getter/setter for the name attribute""" | """test getter/setter for the name attribute""" | ||||
name1 = wraptext("1") | name1 = wraptext("1") | ||||
@@ -47,6 +49,7 @@ def test_name(): | |||||
assert_wikicode_equal(wraptext("héhehé"), node1.name) | assert_wikicode_equal(wraptext("héhehé"), node1.name) | ||||
assert_wikicode_equal(wraptext("héhehé"), node2.name) | assert_wikicode_equal(wraptext("héhehé"), node2.name) | ||||
def test_value(): | def test_value(): | ||||
"""test getter/setter for the value attribute""" | """test getter/setter for the value attribute""" | ||||
value = wraptext("bar") | value = wraptext("bar") | ||||
@@ -55,6 +58,7 @@ def test_value(): | |||||
node.value = "héhehé" | node.value = "héhehé" | ||||
assert_wikicode_equal(wraptext("héhehé"), node.value) | assert_wikicode_equal(wraptext("héhehé"), node.value) | ||||
def test_showkey(): | def test_showkey(): | ||||
"""test getter/setter for the showkey attribute""" | """test getter/setter for the showkey attribute""" | ||||
node1 = Parameter(wraptext("1"), wraptext("foo"), showkey=False) | node1 = Parameter(wraptext("1"), wraptext("foo"), showkey=False) | ||||
@@ -29,6 +29,7 @@ from mwparserfromhell.nodes import Tag, Template, Text, Wikilink | |||||
from mwparserfromhell.nodes.extras import Parameter | from mwparserfromhell.nodes.extras import Parameter | ||||
from .conftest import assert_wikicode_equal, wrap, wraptext | from .conftest import assert_wikicode_equal, wrap, wraptext | ||||
@pytest.fixture() | @pytest.fixture() | ||||
def pyparser(): | def pyparser(): | ||||
"""make sure the correct tokenizer is used""" | """make sure the correct tokenizer is used""" | ||||
@@ -38,37 +39,60 @@ def pyparser(): | |||||
yield | yield | ||||
parser.use_c = restore | parser.use_c = restore | ||||
def test_use_c(pyparser): | def test_use_c(pyparser): | ||||
assert parser.Parser()._tokenizer.USES_C is False | assert parser.Parser()._tokenizer.USES_C is False | ||||
def test_parsing(pyparser): | def test_parsing(pyparser): | ||||
"""integration test for parsing overall""" | """integration test for parsing overall""" | ||||
text = "this is text; {{this|is=a|template={{with|[[links]]|in}}it}}" | text = "this is text; {{this|is=a|template={{with|[[links]]|in}}it}}" | ||||
expected = wrap([ | |||||
Text("this is text; "), | |||||
Template(wraptext("this"), [ | |||||
Parameter(wraptext("is"), wraptext("a")), | |||||
Parameter(wraptext("template"), wrap([ | |||||
Template(wraptext("with"), [ | |||||
Parameter(wraptext("1"), | |||||
wrap([Wikilink(wraptext("links"))]), | |||||
showkey=False), | |||||
Parameter(wraptext("2"), | |||||
wraptext("in"), showkey=False) | |||||
]), | |||||
Text("it") | |||||
])) | |||||
]) | |||||
]) | |||||
expected = wrap( | |||||
[ | |||||
Text("this is text; "), | |||||
Template( | |||||
wraptext("this"), | |||||
[ | |||||
Parameter(wraptext("is"), wraptext("a")), | |||||
Parameter( | |||||
wraptext("template"), | |||||
wrap( | |||||
[ | |||||
Template( | |||||
wraptext("with"), | |||||
[ | |||||
Parameter( | |||||
wraptext("1"), | |||||
wrap([Wikilink(wraptext("links"))]), | |||||
showkey=False, | |||||
), | |||||
Parameter( | |||||
wraptext("2"), wraptext("in"), showkey=False | |||||
), | |||||
], | |||||
), | |||||
Text("it"), | |||||
] | |||||
), | |||||
), | |||||
], | |||||
), | |||||
] | |||||
) | |||||
actual = parser.Parser().parse(text) | actual = parser.Parser().parse(text) | ||||
assert_wikicode_equal(expected, actual) | assert_wikicode_equal(expected, actual) | ||||
def test_skip_style_tags(pyparser): | def test_skip_style_tags(pyparser): | ||||
"""test Parser.parse(skip_style_tags=True)""" | """test Parser.parse(skip_style_tags=True)""" | ||||
text = "This is an example with ''italics''!" | text = "This is an example with ''italics''!" | ||||
a = wrap([Text("This is an example with "), | |||||
Tag(wraptext("i"), wraptext("italics"), wiki_markup="''"), | |||||
Text("!")]) | |||||
a = wrap( | |||||
[ | |||||
Text("This is an example with "), | |||||
Tag(wraptext("i"), wraptext("italics"), wiki_markup="''"), | |||||
Text("!"), | |||||
] | |||||
) | |||||
b = wraptext("This is an example with ''italics''!") | b = wraptext("This is an example with ''italics''!") | ||||
with_style = parser.Parser().parse(text, skip_style_tags=False) | with_style = parser.Parser().parse(text, skip_style_tags=False) | ||||
@@ -27,6 +27,7 @@ import pytest | |||||
from mwparserfromhell.smart_list import SmartList | from mwparserfromhell.smart_list import SmartList | ||||
from mwparserfromhell.smart_list.list_proxy import ListProxy | from mwparserfromhell.smart_list.list_proxy import ListProxy | ||||
def _test_get_set_del_item(builder): | def _test_get_set_del_item(builder): | ||||
"""Run tests on __get/set/delitem__ of a list built with *builder*.""" | """Run tests on __get/set/delitem__ of a list built with *builder*.""" | ||||
list1 = builder([0, 1, 2, 3, "one", "two"]) | list1 = builder([0, 1, 2, 3, "one", "two"]) | ||||
@@ -104,6 +105,7 @@ def _test_get_set_del_item(builder): | |||||
del list2[2:8:2] | del list2[2:8:2] | ||||
assert [0, 1, 3, 5, 7, 8, 9] == list2 | assert [0, 1, 3, 5, 7, 8, 9] == list2 | ||||
def _test_add_radd_iadd(builder): | def _test_add_radd_iadd(builder): | ||||
"""Run tests on __r/i/add__ of a list built with *builder*.""" | """Run tests on __r/i/add__ of a list built with *builder*.""" | ||||
list1 = builder(range(5)) | list1 = builder(range(5)) | ||||
@@ -116,6 +118,7 @@ def _test_add_radd_iadd(builder): | |||||
list1 += ["foo", "bar", "baz"] | list1 += ["foo", "bar", "baz"] | ||||
assert [0, 1, 2, 3, 4, "foo", "bar", "baz"] == list1 | assert [0, 1, 2, 3, 4, "foo", "bar", "baz"] == list1 | ||||
def _test_other_magic_methods(builder): | def _test_other_magic_methods(builder): | ||||
"""Run tests on other magic methods of a list built with *builder*.""" | """Run tests on other magic methods of a list built with *builder*.""" | ||||
list1 = builder([0, 1, 2, 3, "one", "two"]) | list1 = builder([0, 1, 2, 3, "one", "two"]) | ||||
@@ -200,6 +203,7 @@ def _test_other_magic_methods(builder): | |||||
list4 *= 2 | list4 *= 2 | ||||
assert [0, 1, 2, 0, 1, 2] == list4 | assert [0, 1, 2, 0, 1, 2] == list4 | ||||
def _test_list_methods(builder): | def _test_list_methods(builder): | ||||
"""Run tests on the public methods of a list built with *builder*.""" | """Run tests on the public methods of a list built with *builder*.""" | ||||
list1 = builder(range(5)) | list1 = builder(range(5)) | ||||
@@ -263,6 +267,7 @@ def _test_list_methods(builder): | |||||
list3.sort(key=lambda i: i[1], reverse=True) | list3.sort(key=lambda i: i[1], reverse=True) | ||||
assert [("b", 8), ("a", 5), ("c", 3), ("d", 2)] == list3 | assert [("b", 8), ("a", 5), ("c", 3), ("d", 2)] == list3 | ||||
def _dispatch_test_for_children(meth): | def _dispatch_test_for_children(meth): | ||||
"""Run a test method on various different types of children.""" | """Run a test method on various different types of children.""" | ||||
meth(lambda L: SmartList(list(L))[:]) | meth(lambda L: SmartList(list(L))[:]) | ||||
@@ -270,10 +275,20 @@ def _dispatch_test_for_children(meth): | |||||
meth(lambda L: SmartList(list(L) + [999])[:-1]) | meth(lambda L: SmartList(list(L) + [999])[:-1]) | ||||
meth(lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]) | meth(lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]) | ||||
def test_docs(): | def test_docs(): | ||||
"""make sure the methods of SmartList/ListProxy have docstrings""" | """make sure the methods of SmartList/ListProxy have docstrings""" | ||||
methods = ["append", "count", "extend", "index", "insert", "pop", | |||||
"remove", "reverse", "sort"] | |||||
methods = [ | |||||
"append", | |||||
"count", | |||||
"extend", | |||||
"index", | |||||
"insert", | |||||
"pop", | |||||
"remove", | |||||
"reverse", | |||||
"sort", | |||||
] | |||||
for meth in methods: | for meth in methods: | ||||
expected = getattr(list, meth).__doc__ | expected = getattr(list, meth).__doc__ | ||||
smartlist_doc = getattr(SmartList, meth).__doc__ | smartlist_doc = getattr(SmartList, meth).__doc__ | ||||
@@ -281,6 +296,7 @@ def test_docs(): | |||||
assert expected == smartlist_doc | assert expected == smartlist_doc | ||||
assert expected == listproxy_doc | assert expected == listproxy_doc | ||||
def test_doctest(): | def test_doctest(): | ||||
"""make sure the test embedded in SmartList's docstring passes""" | """make sure the test embedded in SmartList's docstring passes""" | ||||
parent = SmartList([0, 1, 2, 3]) | parent = SmartList([0, 1, 2, 3]) | ||||
@@ -291,38 +307,47 @@ def test_doctest(): | |||||
assert [2, 3, 4] == child | assert [2, 3, 4] == child | ||||
assert [0, 1, 2, 3, 4] == parent | assert [0, 1, 2, 3, 4] == parent | ||||
def test_parent_get_set_del(): | def test_parent_get_set_del(): | ||||
"""make sure SmartList's getitem/setitem/delitem work""" | """make sure SmartList's getitem/setitem/delitem work""" | ||||
_test_get_set_del_item(SmartList) | _test_get_set_del_item(SmartList) | ||||
def test_parent_add(): | def test_parent_add(): | ||||
"""make sure SmartList's add/radd/iadd work""" | """make sure SmartList's add/radd/iadd work""" | ||||
_test_add_radd_iadd(SmartList) | _test_add_radd_iadd(SmartList) | ||||
def test_parent_other_magics(): | def test_parent_other_magics(): | ||||
"""make sure SmartList's other magically implemented features work""" | """make sure SmartList's other magically implemented features work""" | ||||
_test_other_magic_methods(SmartList) | _test_other_magic_methods(SmartList) | ||||
def test_parent_methods(): | def test_parent_methods(): | ||||
"""make sure SmartList's non-magic methods work, like append()""" | """make sure SmartList's non-magic methods work, like append()""" | ||||
_test_list_methods(SmartList) | _test_list_methods(SmartList) | ||||
def test_child_get_set_del(): | def test_child_get_set_del(): | ||||
"""make sure ListProxy's getitem/setitem/delitem work""" | """make sure ListProxy's getitem/setitem/delitem work""" | ||||
_dispatch_test_for_children(_test_get_set_del_item) | _dispatch_test_for_children(_test_get_set_del_item) | ||||
def test_child_add(): | def test_child_add(): | ||||
"""make sure ListProxy's add/radd/iadd work""" | """make sure ListProxy's add/radd/iadd work""" | ||||
_dispatch_test_for_children(_test_add_radd_iadd) | _dispatch_test_for_children(_test_add_radd_iadd) | ||||
def test_child_other_magics(): | def test_child_other_magics(): | ||||
"""make sure ListProxy's other magically implemented features work""" | """make sure ListProxy's other magically implemented features work""" | ||||
_dispatch_test_for_children(_test_other_magic_methods) | _dispatch_test_for_children(_test_other_magic_methods) | ||||
def test_child_methods(): | def test_child_methods(): | ||||
"""make sure ListProxy's non-magic methods work, like append()""" | """make sure ListProxy's non-magic methods work, like append()""" | ||||
_dispatch_test_for_children(_test_list_methods) | _dispatch_test_for_children(_test_list_methods) | ||||
def test_influence(): | def test_influence(): | ||||
"""make sure changes are propagated from parents to children""" | """make sure changes are propagated from parents to children""" | ||||
parent = SmartList([0, 1, 2, 3, 4, 5]) | parent = SmartList([0, 1, 2, 3, 4, 5]) | ||||
@@ -29,6 +29,7 @@ import pytest | |||||
from mwparserfromhell.string_mixin import StringMixIn | from mwparserfromhell.string_mixin import StringMixIn | ||||
class _FakeString(StringMixIn): | class _FakeString(StringMixIn): | ||||
def __init__(self, data): | def __init__(self, data): | ||||
self._data = data | self._data = data | ||||
@@ -36,22 +37,63 @@ class _FakeString(StringMixIn): | |||||
def __str__(self): | def __str__(self): | ||||
return self._data | return self._data | ||||
@pytest.mark.parametrize('method', [ | |||||
"capitalize", "casefold", "center", "count", "encode", "endswith", | |||||
"expandtabs", "find", "format", "format_map", "index", "isalnum", | |||||
"isalpha", "isdecimal", "isdigit", "isidentifier", "islower", | |||||
"isnumeric", "isprintable", "isspace", "istitle", "isupper", | |||||
"join", "ljust", "lower", "lstrip", "maketrans", "partition", | |||||
"replace", "rfind", "rindex", "rjust", "rpartition", "rsplit", | |||||
"rstrip", "split", "splitlines", "startswith", "strip", "swapcase", | |||||
"title", "translate", "upper", "zfill" | |||||
]) | |||||
@pytest.mark.parametrize( | |||||
"method", | |||||
[ | |||||
"capitalize", | |||||
"casefold", | |||||
"center", | |||||
"count", | |||||
"encode", | |||||
"endswith", | |||||
"expandtabs", | |||||
"find", | |||||
"format", | |||||
"format_map", | |||||
"index", | |||||
"isalnum", | |||||
"isalpha", | |||||
"isdecimal", | |||||
"isdigit", | |||||
"isidentifier", | |||||
"islower", | |||||
"isnumeric", | |||||
"isprintable", | |||||
"isspace", | |||||
"istitle", | |||||
"isupper", | |||||
"join", | |||||
"ljust", | |||||
"lower", | |||||
"lstrip", | |||||
"maketrans", | |||||
"partition", | |||||
"replace", | |||||
"rfind", | |||||
"rindex", | |||||
"rjust", | |||||
"rpartition", | |||||
"rsplit", | |||||
"rstrip", | |||||
"split", | |||||
"splitlines", | |||||
"startswith", | |||||
"strip", | |||||
"swapcase", | |||||
"title", | |||||
"translate", | |||||
"upper", | |||||
"zfill", | |||||
], | |||||
) | |||||
def test_docs(method): | def test_docs(method): | ||||
"""make sure the various methods of StringMixIn have docstrings""" | """make sure the various methods of StringMixIn have docstrings""" | ||||
expected = getattr("foo", method).__doc__ | expected = getattr("foo", method).__doc__ | ||||
actual = getattr(_FakeString("foo"), method).__doc__ | actual = getattr(_FakeString("foo"), method).__doc__ | ||||
assert expected == actual | assert expected == actual | ||||
def test_types(): | def test_types(): | ||||
"""make sure StringMixIns convert to different types correctly""" | """make sure StringMixIns convert to different types correctly""" | ||||
fstr = _FakeString("fake string") | fstr = _FakeString("fake string") | ||||
@@ -63,6 +105,7 @@ def test_types(): | |||||
assert isinstance(bytes(fstr), bytes) | assert isinstance(bytes(fstr), bytes) | ||||
assert isinstance(repr(fstr), str) | assert isinstance(repr(fstr), str) | ||||
def test_comparisons(): | def test_comparisons(): | ||||
"""make sure comparison operators work""" | """make sure comparison operators work""" | ||||
str1 = _FakeString("this is a fake string") | str1 = _FakeString("this is a fake string") | ||||
@@ -99,6 +142,7 @@ def test_comparisons(): | |||||
assert str5 < str1 | assert str5 < str1 | ||||
assert str5 <= str1 | assert str5 <= str1 | ||||
def test_other_magics(): | def test_other_magics(): | ||||
"""test other magically implemented features, like len() and iter()""" | """test other magically implemented features, like len() and iter()""" | ||||
str1 = _FakeString("fake string") | str1 = _FakeString("fake string") | ||||
@@ -154,6 +198,7 @@ def test_other_magics(): | |||||
assert "real" not in str1 | assert "real" not in str1 | ||||
assert "s" not in str2 | assert "s" not in str2 | ||||
def test_other_methods(): | def test_other_methods(): | ||||
"""test the remaining non-magic methods of StringMixIn""" | """test the remaining non-magic methods of StringMixIn""" | ||||
str1 = _FakeString("fake string") | str1 = _FakeString("fake string") | ||||
@@ -354,8 +399,21 @@ def test_other_methods(): | |||||
actual = ["this", "is", "a", "sentence", "with", "whitespace"] | actual = ["this", "is", "a", "sentence", "with", "whitespace"] | ||||
assert actual == str25.rsplit() | assert actual == str25.rsplit() | ||||
assert actual == str25.rsplit(None) | assert actual == str25.rsplit(None) | ||||
actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", | |||||
"", "whitespace", ""] | |||||
actual = [ | |||||
"", | |||||
"", | |||||
"", | |||||
"this", | |||||
"is", | |||||
"a", | |||||
"", | |||||
"", | |||||
"sentence", | |||||
"with", | |||||
"", | |||||
"whitespace", | |||||
"", | |||||
] | |||||
assert actual == str25.rsplit(" ") | assert actual == str25.rsplit(" ") | ||||
actual = [" this is a", "sentence", "with", "whitespace"] | actual = [" this is a", "sentence", "with", "whitespace"] | ||||
assert actual == str25.rsplit(None, 3) | assert actual == str25.rsplit(None, 3) | ||||
@@ -371,8 +429,21 @@ def test_other_methods(): | |||||
actual = ["this", "is", "a", "sentence", "with", "whitespace"] | actual = ["this", "is", "a", "sentence", "with", "whitespace"] | ||||
assert actual == str25.split() | assert actual == str25.split() | ||||
assert actual == str25.split(None) | assert actual == str25.split(None) | ||||
actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", | |||||
"", "whitespace", ""] | |||||
actual = [ | |||||
"", | |||||
"", | |||||
"", | |||||
"this", | |||||
"is", | |||||
"a", | |||||
"", | |||||
"", | |||||
"sentence", | |||||
"with", | |||||
"", | |||||
"whitespace", | |||||
"", | |||||
] | |||||
assert actual == str25.split(" ") | assert actual == str25.split(" ") | ||||
actual = ["this", "is", "a", "sentence with whitespace "] | actual = ["this", "is", "a", "sentence with whitespace "] | ||||
assert actual == str25.split(None, 3) | assert actual == str25.split(None, 3) | ||||
@@ -382,10 +453,15 @@ def test_other_methods(): | |||||
assert actual == str25.split(maxsplit=3) | assert actual == str25.split(maxsplit=3) | ||||
str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") | str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") | ||||
assert ["lines", "of", "text", "are", "presented", "here"] \ | |||||
== str26.splitlines() | |||||
assert ["lines\n", "of\n", "text\r\n", "are\r\n", "presented\n", "here"] \ | |||||
== str26.splitlines(True) | |||||
assert ["lines", "of", "text", "are", "presented", "here"] == str26.splitlines() | |||||
assert [ | |||||
"lines\n", | |||||
"of\n", | |||||
"text\r\n", | |||||
"are\r\n", | |||||
"presented\n", | |||||
"here", | |||||
] == str26.splitlines(True) | |||||
assert str1.startswith("fake") is True | assert str1.startswith("fake") is True | ||||
assert str1.startswith("faker") is False | assert str1.startswith("faker") is False | ||||
@@ -398,8 +474,7 @@ def test_other_methods(): | |||||
assert "Fake String" == str1.title() | assert "Fake String" == str1.title() | ||||
table1 = StringMixIn.maketrans({97: "1", 101: "2", 105: "3", | |||||
111: "4", 117: "5"}) | |||||
table1 = StringMixIn.maketrans({97: "1", 101: "2", 105: "3", 111: "4", 117: "5"}) | |||||
table2 = StringMixIn.maketrans("aeiou", "12345") | table2 = StringMixIn.maketrans("aeiou", "12345") | ||||
table3 = StringMixIn.maketrans("aeiou", "12345", "rts") | table3 = StringMixIn.maketrans("aeiou", "12345", "rts") | ||||
assert "f1k2 str3ng" == str1.translate(table1) | assert "f1k2 str3ng" == str1.translate(table1) | ||||
@@ -34,21 +34,20 @@ agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), None) | |||||
agenp = lambda name, v, a, b, c: Attribute(wraptext(name), v, '"', a, b, c) | agenp = lambda name, v, a, b, c: Attribute(wraptext(name), v, '"', a, b, c) | ||||
agenpnv = lambda name, a, b, c: Attribute(wraptext(name), None, '"', a, b, c) | agenpnv = lambda name, a, b, c: Attribute(wraptext(name), None, '"', a, b, c) | ||||
def test_str(): | def test_str(): | ||||
"""test Tag.__str__()""" | """test Tag.__str__()""" | ||||
node1 = Tag(wraptext("ref")) | node1 = Tag(wraptext("ref")) | ||||
node2 = Tag(wraptext("span"), wraptext("foo"), | |||||
[agen("style", "color: red;")]) | |||||
node3 = Tag(wraptext("ref"), | |||||
attrs=[agennq("name", "foo"), | |||||
agenpnv("some_attr", " ", "", "")], | |||||
self_closing=True) | |||||
node2 = Tag(wraptext("span"), wraptext("foo"), [agen("style", "color: red;")]) | |||||
node3 = Tag( | |||||
wraptext("ref"), | |||||
attrs=[agennq("name", "foo"), agenpnv("some_attr", " ", "", "")], | |||||
self_closing=True, | |||||
) | |||||
node4 = Tag(wraptext("br"), self_closing=True, padding=" ") | node4 = Tag(wraptext("br"), self_closing=True, padding=" ") | ||||
node5 = Tag(wraptext("br"), self_closing=True, implicit=True) | node5 = Tag(wraptext("br"), self_closing=True, implicit=True) | ||||
node6 = Tag(wraptext("br"), self_closing=True, invalid=True, | |||||
implicit=True) | |||||
node7 = Tag(wraptext("br"), self_closing=True, invalid=True, | |||||
padding=" ") | |||||
node6 = Tag(wraptext("br"), self_closing=True, invalid=True, implicit=True) | |||||
node7 = Tag(wraptext("br"), self_closing=True, invalid=True, padding=" ") | |||||
node8 = Tag(wraptext("hr"), wiki_markup="----", self_closing=True) | node8 = Tag(wraptext("hr"), wiki_markup="----", self_closing=True) | ||||
node9 = Tag(wraptext("i"), wraptext("italics!"), wiki_markup="''") | node9 = Tag(wraptext("i"), wraptext("italics!"), wiki_markup="''") | ||||
@@ -62,6 +61,7 @@ def test_str(): | |||||
assert "----" == str(node8) | assert "----" == str(node8) | ||||
assert "''italics!''" == str(node9) | assert "''italics!''" == str(node9) | ||||
def test_children(): | def test_children(): | ||||
"""test Tag.__children__()""" | """test Tag.__children__()""" | ||||
# <ref>foobar</ref> | # <ref>foobar</ref> | ||||
@@ -69,10 +69,12 @@ def test_children(): | |||||
# '''bold text''' | # '''bold text''' | ||||
node2 = Tag(wraptext("b"), wraptext("bold text"), wiki_markup="'''") | node2 = Tag(wraptext("b"), wraptext("bold text"), wiki_markup="'''") | ||||
# <img id="foo" class="bar" selected /> | # <img id="foo" class="bar" selected /> | ||||
node3 = Tag(wraptext("img"), | |||||
attrs=[agen("id", "foo"), agen("class", "bar"), | |||||
agennv("selected")], | |||||
self_closing=True, padding=" ") | |||||
node3 = Tag( | |||||
wraptext("img"), | |||||
attrs=[agen("id", "foo"), agen("class", "bar"), agennv("selected")], | |||||
self_closing=True, | |||||
padding=" ", | |||||
) | |||||
gen1 = node1.__children__() | gen1 = node1.__children__() | ||||
gen2 = node2.__children__() | gen2 = node2.__children__() | ||||
@@ -94,6 +96,7 @@ def test_children(): | |||||
with pytest.raises(StopIteration): | with pytest.raises(StopIteration): | ||||
next(gen3) | next(gen3) | ||||
def test_strip(): | def test_strip(): | ||||
"""test Tag.__strip__()""" | """test Tag.__strip__()""" | ||||
node1 = Tag(wraptext("i"), wraptext("foobar")) | node1 = Tag(wraptext("i"), wraptext("foobar")) | ||||
@@ -104,28 +107,46 @@ def test_strip(): | |||||
assert node2.__strip__() is None | assert node2.__strip__() is None | ||||
assert node3.__strip__() is None | assert node3.__strip__() is None | ||||
def test_showtree(): | def test_showtree(): | ||||
"""test Tag.__showtree__()""" | """test Tag.__showtree__()""" | ||||
output = [] | output = [] | ||||
getter, marker = object(), object() | getter, marker = object(), object() | ||||
get = lambda code: output.append((getter, code)) | get = lambda code: output.append((getter, code)) | ||||
mark = lambda: output.append(marker) | mark = lambda: output.append(marker) | ||||
node1 = Tag(wraptext("ref"), wraptext("text"), | |||||
[agen("name", "foo"), agennv("selected")]) | |||||
node1 = Tag( | |||||
wraptext("ref"), wraptext("text"), [agen("name", "foo"), agennv("selected")] | |||||
) | |||||
node2 = Tag(wraptext("br"), self_closing=True, padding=" ") | node2 = Tag(wraptext("br"), self_closing=True, padding=" ") | ||||
node3 = Tag(wraptext("br"), self_closing=True, invalid=True, | |||||
implicit=True, padding=" ") | |||||
node3 = Tag( | |||||
wraptext("br"), self_closing=True, invalid=True, implicit=True, padding=" " | |||||
) | |||||
node1.__showtree__(output.append, get, mark) | node1.__showtree__(output.append, get, mark) | ||||
node2.__showtree__(output.append, get, mark) | node2.__showtree__(output.append, get, mark) | ||||
node3.__showtree__(output.append, get, mark) | node3.__showtree__(output.append, get, mark) | ||||
valid = [ | valid = [ | ||||
"<", (getter, node1.tag), (getter, node1.attributes[0].name), | |||||
" = ", marker, (getter, node1.attributes[0].value), | |||||
(getter, node1.attributes[1].name), ">", (getter, node1.contents), | |||||
"</", (getter, node1.closing_tag), ">", "<", (getter, node2.tag), | |||||
"/>", "</", (getter, node3.tag), ">"] | |||||
"<", | |||||
(getter, node1.tag), | |||||
(getter, node1.attributes[0].name), | |||||
" = ", | |||||
marker, | |||||
(getter, node1.attributes[0].value), | |||||
(getter, node1.attributes[1].name), | |||||
">", | |||||
(getter, node1.contents), | |||||
"</", | |||||
(getter, node1.closing_tag), | |||||
">", | |||||
"<", | |||||
(getter, node2.tag), | |||||
"/>", | |||||
"</", | |||||
(getter, node3.tag), | |||||
">", | |||||
] | |||||
assert valid == output | assert valid == output | ||||
def test_tag(): | def test_tag(): | ||||
"""test getter/setter for the tag attribute""" | """test getter/setter for the tag attribute""" | ||||
tag = wraptext("ref") | tag = wraptext("ref") | ||||
@@ -137,6 +158,7 @@ def test_tag(): | |||||
assert_wikicode_equal(wraptext("span"), node.closing_tag) | assert_wikicode_equal(wraptext("span"), node.closing_tag) | ||||
assert "<span>text</span>" == node | assert "<span>text</span>" == node | ||||
def test_contents(): | def test_contents(): | ||||
"""test getter/setter for the contents attribute""" | """test getter/setter for the contents attribute""" | ||||
contents = wraptext("text") | contents = wraptext("text") | ||||
@@ -147,6 +169,7 @@ def test_contents(): | |||||
assert_wikicode_equal(parsed, node.contents) | assert_wikicode_equal(parsed, node.contents) | ||||
assert "<ref>text and a {{template}}</ref>" == node | assert "<ref>text and a {{template}}</ref>" == node | ||||
def test_attributes(): | def test_attributes(): | ||||
"""test getter for the attributes attribute""" | """test getter for the attributes attribute""" | ||||
attrs = [agen("name", "bar")] | attrs = [agen("name", "bar")] | ||||
@@ -155,6 +178,7 @@ def test_attributes(): | |||||
assert [] == node1.attributes | assert [] == node1.attributes | ||||
assert attrs is node2.attributes | assert attrs is node2.attributes | ||||
def test_wiki_markup(): | def test_wiki_markup(): | ||||
"""test getter/setter for the wiki_markup attribute""" | """test getter/setter for the wiki_markup attribute""" | ||||
node = Tag(wraptext("i"), wraptext("italic text")) | node = Tag(wraptext("i"), wraptext("italic text")) | ||||
@@ -166,6 +190,7 @@ def test_wiki_markup(): | |||||
assert node.wiki_markup is None | assert node.wiki_markup is None | ||||
assert "<i>italic text</i>" == node | assert "<i>italic text</i>" == node | ||||
def test_self_closing(): | def test_self_closing(): | ||||
"""test getter/setter for the self_closing attribute""" | """test getter/setter for the self_closing attribute""" | ||||
node = Tag(wraptext("ref"), wraptext("foobar")) | node = Tag(wraptext("ref"), wraptext("foobar")) | ||||
@@ -177,6 +202,7 @@ def test_self_closing(): | |||||
assert node.self_closing is False | assert node.self_closing is False | ||||
assert "<ref>foobar</ref>" == node | assert "<ref>foobar</ref>" == node | ||||
def test_invalid(): | def test_invalid(): | ||||
"""test getter/setter for the invalid attribute""" | """test getter/setter for the invalid attribute""" | ||||
node = Tag(wraptext("br"), self_closing=True, implicit=True) | node = Tag(wraptext("br"), self_closing=True, implicit=True) | ||||
@@ -188,6 +214,7 @@ def test_invalid(): | |||||
assert node.invalid is False | assert node.invalid is False | ||||
assert "<br>" == node | assert "<br>" == node | ||||
def test_implicit(): | def test_implicit(): | ||||
"""test getter/setter for the implicit attribute""" | """test getter/setter for the implicit attribute""" | ||||
node = Tag(wraptext("br"), self_closing=True) | node = Tag(wraptext("br"), self_closing=True) | ||||
@@ -199,6 +226,7 @@ def test_implicit(): | |||||
assert node.implicit is False | assert node.implicit is False | ||||
assert "<br/>" == node | assert "<br/>" == node | ||||
def test_padding(): | def test_padding(): | ||||
"""test getter/setter for the padding attribute""" | """test getter/setter for the padding attribute""" | ||||
node = Tag(wraptext("ref"), wraptext("foobar")) | node = Tag(wraptext("ref"), wraptext("foobar")) | ||||
@@ -212,6 +240,7 @@ def test_padding(): | |||||
with pytest.raises(ValueError): | with pytest.raises(ValueError): | ||||
node.__setattr__("padding", True) | node.__setattr__("padding", True) | ||||
def test_closing_tag(): | def test_closing_tag(): | ||||
"""test getter/setter for the closing_tag attribute""" | """test getter/setter for the closing_tag attribute""" | ||||
tag = wraptext("ref") | tag = wraptext("ref") | ||||
@@ -222,6 +251,7 @@ def test_closing_tag(): | |||||
assert_wikicode_equal(parsed, node.closing_tag) | assert_wikicode_equal(parsed, node.closing_tag) | ||||
assert "<ref>foobar</ref {{ignore me}}>" == node | assert "<ref>foobar</ref {{ignore me}}>" == node | ||||
def test_wiki_style_separator(): | def test_wiki_style_separator(): | ||||
"""test getter/setter for wiki_style_separator attribute""" | """test getter/setter for wiki_style_separator attribute""" | ||||
node = Tag(wraptext("table"), wraptext("\n")) | node = Tag(wraptext("table"), wraptext("\n")) | ||||
@@ -233,6 +263,7 @@ def test_wiki_style_separator(): | |||||
node2 = Tag(wraptext("table"), wraptext("\n"), wiki_style_separator="|") | node2 = Tag(wraptext("table"), wraptext("\n"), wiki_style_separator="|") | ||||
assert "|" == node2.wiki_style_separator | assert "|" == node2.wiki_style_separator | ||||
def test_closing_wiki_markup(): | def test_closing_wiki_markup(): | ||||
"""test getter/setter for closing_wiki_markup attribute""" | """test getter/setter for closing_wiki_markup attribute""" | ||||
node = Tag(wraptext("table"), wraptext("\n")) | node = Tag(wraptext("table"), wraptext("\n")) | ||||
@@ -248,12 +279,17 @@ def test_closing_wiki_markup(): | |||||
node.wiki_markup = False | node.wiki_markup = False | ||||
assert node.closing_wiki_markup is None | assert node.closing_wiki_markup is None | ||||
assert "<table>\n</table>" == node | assert "<table>\n</table>" == node | ||||
node2 = Tag(wraptext("table"), wraptext("\n"), | |||||
attrs=[agen("id", "foo")], wiki_markup="{|", | |||||
closing_wiki_markup="|}") | |||||
node2 = Tag( | |||||
wraptext("table"), | |||||
wraptext("\n"), | |||||
attrs=[agen("id", "foo")], | |||||
wiki_markup="{|", | |||||
closing_wiki_markup="|}", | |||||
) | |||||
assert "|}" == node2.closing_wiki_markup | assert "|}" == node2.closing_wiki_markup | ||||
assert '{| id="foo"\n|}' == node2 | assert '{| id="foo"\n|}' == node2 | ||||
def test_has(): | def test_has(): | ||||
"""test Tag.has()""" | """test Tag.has()""" | ||||
node = Tag(wraptext("ref"), wraptext("cite"), [agen("name", "foo")]) | node = Tag(wraptext("ref"), wraptext("cite"), [agen("name", "foo")]) | ||||
@@ -263,19 +299,26 @@ def test_has(): | |||||
assert node.has("Name") is False | assert node.has("Name") is False | ||||
assert node.has("foo") is False | assert node.has("foo") is False | ||||
attrs = [agen("id", "foo"), agenp("class", "bar", " ", "\n", "\n"), | |||||
agen("foo", "bar"), agenpnv("foo", " ", " \n ", " \t")] | |||||
attrs = [ | |||||
agen("id", "foo"), | |||||
agenp("class", "bar", " ", "\n", "\n"), | |||||
agen("foo", "bar"), | |||||
agenpnv("foo", " ", " \n ", " \t"), | |||||
] | |||||
node2 = Tag(wraptext("div"), attrs=attrs, self_closing=True) | node2 = Tag(wraptext("div"), attrs=attrs, self_closing=True) | ||||
assert node2.has("id") is True | assert node2.has("id") is True | ||||
assert node2.has("class") is True | assert node2.has("class") is True | ||||
assert node2.has(attrs[1].pad_first + str(attrs[1].name) + | |||||
attrs[1].pad_before_eq) is True | |||||
assert ( | |||||
node2.has(attrs[1].pad_first + str(attrs[1].name) + attrs[1].pad_before_eq) | |||||
is True | |||||
) | |||||
assert node2.has(attrs[3]) is True | assert node2.has(attrs[3]) is True | ||||
assert node2.has(str(attrs[3])) is True | assert node2.has(str(attrs[3])) is True | ||||
assert node2.has("idclass") is False | assert node2.has("idclass") is False | ||||
assert node2.has("id class") is False | assert node2.has("id class") is False | ||||
assert node2.has("id=foo") is False | assert node2.has("id=foo") is False | ||||
def test_get(): | def test_get(): | ||||
"""test Tag.get()""" | """test Tag.get()""" | ||||
attrs = [agen("name", "foo")] | attrs = [agen("name", "foo")] | ||||
@@ -288,13 +331,18 @@ def test_get(): | |||||
with pytest.raises(ValueError): | with pytest.raises(ValueError): | ||||
node.get("foo") | node.get("foo") | ||||
attrs = [agen("id", "foo"), agenp("class", "bar", " ", "\n", "\n"), | |||||
agen("foo", "bar"), agenpnv("foo", " ", " \n ", " \t")] | |||||
attrs = [ | |||||
agen("id", "foo"), | |||||
agenp("class", "bar", " ", "\n", "\n"), | |||||
agen("foo", "bar"), | |||||
agenpnv("foo", " ", " \n ", " \t"), | |||||
] | |||||
node2 = Tag(wraptext("div"), attrs=attrs, self_closing=True) | node2 = Tag(wraptext("div"), attrs=attrs, self_closing=True) | ||||
assert attrs[0] is node2.get("id") | assert attrs[0] is node2.get("id") | ||||
assert attrs[1] is node2.get("class") | assert attrs[1] is node2.get("class") | ||||
assert attrs[1] is node2.get( | assert attrs[1] is node2.get( | ||||
attrs[1].pad_first + str(attrs[1].name) + attrs[1].pad_before_eq) | |||||
attrs[1].pad_first + str(attrs[1].name) + attrs[1].pad_before_eq | |||||
) | |||||
assert attrs[3] is node2.get(attrs[3]) | assert attrs[3] is node2.get(attrs[3]) | ||||
assert attrs[3] is node2.get(str(attrs[3])) | assert attrs[3] is node2.get(str(attrs[3])) | ||||
assert attrs[3] is node2.get(" foo") | assert attrs[3] is node2.get(" foo") | ||||
@@ -305,6 +353,7 @@ def test_get(): | |||||
with pytest.raises(ValueError): | with pytest.raises(ValueError): | ||||
node2.get("id=foo") | node2.get("id=foo") | ||||
def test_add(): | def test_add(): | ||||
"""test Tag.add()""" | """test Tag.add()""" | ||||
node = Tag(wraptext("ref"), wraptext("cite")) | node = Tag(wraptext("ref"), wraptext("cite")) | ||||
@@ -330,19 +379,29 @@ def test_add(): | |||||
assert attr6 == node.attributes[5] | assert attr6 == node.attributes[5] | ||||
assert attr7 == node.attributes[6] | assert attr7 == node.attributes[6] | ||||
assert attr7 == node.get("name") | assert attr7 == node.get("name") | ||||
assert_wikicode_equal(wrap([Template(wraptext("foobar"))]), | |||||
node.attributes[5].value) | |||||
assert "".join(("<ref", attr1, attr2, attr3, attr4, attr5, | |||||
attr6, attr7, ">cite</ref>")) == node | |||||
assert_wikicode_equal( | |||||
wrap([Template(wraptext("foobar"))]), node.attributes[5].value | |||||
) | |||||
assert ( | |||||
"".join( | |||||
("<ref", attr1, attr2, attr3, attr4, attr5, attr6, attr7, ">cite</ref>") | |||||
) | |||||
== node | |||||
) | |||||
with pytest.raises(ValueError): | with pytest.raises(ValueError): | ||||
node.add("name", "foo", quotes="bar") | node.add("name", "foo", quotes="bar") | ||||
with pytest.raises(ValueError): | with pytest.raises(ValueError): | ||||
node.add("name", "a bc d", quotes=None) | node.add("name", "a bc d", quotes=None) | ||||
def test_remove(): | def test_remove(): | ||||
"""test Tag.remove()""" | """test Tag.remove()""" | ||||
attrs = [agen("id", "foo"), agenp("class", "bar", " ", "\n", "\n"), | |||||
agen("foo", "bar"), agenpnv("foo", " ", " \n ", " \t")] | |||||
attrs = [ | |||||
agen("id", "foo"), | |||||
agenp("class", "bar", " ", "\n", "\n"), | |||||
agen("foo", "bar"), | |||||
agenpnv("foo", " ", " \n ", " \t"), | |||||
] | |||||
node = Tag(wraptext("div"), attrs=attrs, self_closing=True) | node = Tag(wraptext("div"), attrs=attrs, self_closing=True) | ||||
node.remove("class") | node.remove("class") | ||||
assert '<div id="foo" foo="bar" foo \n />' == node | assert '<div id="foo" foo="bar" foo \n />' == node | ||||
@@ -351,4 +410,4 @@ def test_remove(): | |||||
with pytest.raises(ValueError): | with pytest.raises(ValueError): | ||||
node.remove("foo") | node.remove("foo") | ||||
node.remove("id") | node.remove("id") | ||||
assert '<div/>' == node | |||||
assert "<div/>" == node |
@@ -34,19 +34,19 @@ from .conftest import assert_wikicode_equal, wrap, wraptext | |||||
pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True) | pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True) | ||||
pgenh = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=False) | pgenh = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=False) | ||||
def test_str(): | def test_str(): | ||||
"""test Template.__str__()""" | """test Template.__str__()""" | ||||
node = Template(wraptext("foobar")) | node = Template(wraptext("foobar")) | ||||
assert "{{foobar}}" == str(node) | assert "{{foobar}}" == str(node) | ||||
node2 = Template(wraptext("foo"), | |||||
[pgenh("1", "bar"), pgens("abc", "def")]) | |||||
node2 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("abc", "def")]) | |||||
assert "{{foo|bar|abc=def}}" == str(node2) | assert "{{foo|bar|abc=def}}" == str(node2) | ||||
def test_children(): | def test_children(): | ||||
"""test Template.__children__()""" | """test Template.__children__()""" | ||||
node2p1 = Parameter(wraptext("1"), wraptext("bar"), showkey=False) | node2p1 = Parameter(wraptext("1"), wraptext("bar"), showkey=False) | ||||
node2p2 = Parameter(wraptext("abc"), wrap([Text("def"), Text("ghi")]), | |||||
showkey=True) | |||||
node2p2 = Parameter(wraptext("abc"), wrap([Text("def"), Text("ghi")]), showkey=True) | |||||
node1 = Template(wraptext("foobar")) | node1 = Template(wraptext("foobar")) | ||||
node2 = Template(wraptext("foo"), [node2p1, node2p2]) | node2 = Template(wraptext("foo"), [node2p1, node2p2]) | ||||
@@ -62,16 +62,23 @@ def test_children(): | |||||
with pytest.raises(StopIteration): | with pytest.raises(StopIteration): | ||||
next(gen2) | next(gen2) | ||||
def test_strip(): | def test_strip(): | ||||
"""test Template.__strip__()""" | """test Template.__strip__()""" | ||||
node1 = Template(wraptext("foobar")) | node1 = Template(wraptext("foobar")) | ||||
node2 = Template(wraptext("foo"), [ | |||||
pgenh("1", "bar"), pgens("foo", ""), pgens("abc", "def")]) | |||||
node3 = Template(wraptext("foo"), [ | |||||
pgenh("1", "foo"), | |||||
Parameter(wraptext("2"), wrap([Template(wraptext("hello"))]), | |||||
showkey=False), | |||||
pgenh("3", "bar")]) | |||||
node2 = Template( | |||||
wraptext("foo"), [pgenh("1", "bar"), pgens("foo", ""), pgens("abc", "def")] | |||||
) | |||||
node3 = Template( | |||||
wraptext("foo"), | |||||
[ | |||||
pgenh("1", "foo"), | |||||
Parameter( | |||||
wraptext("2"), wrap([Template(wraptext("hello"))]), showkey=False | |||||
), | |||||
pgenh("3", "bar"), | |||||
], | |||||
) | |||||
assert node1.__strip__(keep_template_params=False) is None | assert node1.__strip__(keep_template_params=False) is None | ||||
assert node2.__strip__(keep_template_params=False) is None | assert node2.__strip__(keep_template_params=False) is None | ||||
@@ -79,6 +86,7 @@ def test_strip(): | |||||
assert "bar def" == node2.__strip__(keep_template_params=True) | assert "bar def" == node2.__strip__(keep_template_params=True) | ||||
assert "foo bar" == node3.__strip__(keep_template_params=True) | assert "foo bar" == node3.__strip__(keep_template_params=True) | ||||
def test_showtree(): | def test_showtree(): | ||||
"""test Template.__showtree__()""" | """test Template.__showtree__()""" | ||||
output = [] | output = [] | ||||
@@ -86,18 +94,32 @@ def test_showtree(): | |||||
get = lambda code: output.append((getter, code)) | get = lambda code: output.append((getter, code)) | ||||
mark = lambda: output.append(marker) | mark = lambda: output.append(marker) | ||||
node1 = Template(wraptext("foobar")) | node1 = Template(wraptext("foobar")) | ||||
node2 = Template(wraptext("foo"), | |||||
[pgenh("1", "bar"), pgens("abc", "def")]) | |||||
node2 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("abc", "def")]) | |||||
node1.__showtree__(output.append, get, mark) | node1.__showtree__(output.append, get, mark) | ||||
node2.__showtree__(output.append, get, mark) | node2.__showtree__(output.append, get, mark) | ||||
valid = [ | valid = [ | ||||
"{{", (getter, node1.name), "}}", "{{", (getter, node2.name), | |||||
" | ", marker, (getter, node2.params[0].name), " = ", marker, | |||||
(getter, node2.params[0].value), " | ", marker, | |||||
(getter, node2.params[1].name), " = ", marker, | |||||
(getter, node2.params[1].value), "}}"] | |||||
"{{", | |||||
(getter, node1.name), | |||||
"}}", | |||||
"{{", | |||||
(getter, node2.name), | |||||
" | ", | |||||
marker, | |||||
(getter, node2.params[0].name), | |||||
" = ", | |||||
marker, | |||||
(getter, node2.params[0].value), | |||||
" | ", | |||||
marker, | |||||
(getter, node2.params[1].name), | |||||
" = ", | |||||
marker, | |||||
(getter, node2.params[1].value), | |||||
"}}", | |||||
] | |||||
assert valid == output | assert valid == output | ||||
def test_name(): | def test_name(): | ||||
"""test getter/setter for the name attribute""" | """test getter/setter for the name attribute""" | ||||
name = wraptext("foobar") | name = wraptext("foobar") | ||||
@@ -110,6 +132,7 @@ def test_name(): | |||||
assert_wikicode_equal(wraptext("asdf"), node1.name) | assert_wikicode_equal(wraptext("asdf"), node1.name) | ||||
assert_wikicode_equal(wraptext("téstïng"), node2.name) | assert_wikicode_equal(wraptext("téstïng"), node2.name) | ||||
def test_params(): | def test_params(): | ||||
"""test getter for the params attribute""" | """test getter for the params attribute""" | ||||
node1 = Template(wraptext("foobar")) | node1 = Template(wraptext("foobar")) | ||||
@@ -118,13 +141,14 @@ def test_params(): | |||||
assert [] == node1.params | assert [] == node1.params | ||||
assert plist is node2.params | assert plist is node2.params | ||||
def test_has(): | def test_has(): | ||||
"""test Template.has()""" | """test Template.has()""" | ||||
node1 = Template(wraptext("foobar")) | node1 = Template(wraptext("foobar")) | ||||
node2 = Template(wraptext("foo"), | |||||
[pgenh("1", "bar"), pgens("\nabc ", "def")]) | |||||
node3 = Template(wraptext("foo"), | |||||
[pgenh("1", "a"), pgens("b", "c"), pgens("1", "d")]) | |||||
node2 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("\nabc ", "def")]) | |||||
node3 = Template( | |||||
wraptext("foo"), [pgenh("1", "a"), pgens("b", "c"), pgens("1", "d")] | |||||
) | |||||
node4 = Template(wraptext("foo"), [pgenh("1", "a"), pgens("b", " ")]) | node4 = Template(wraptext("foo"), [pgenh("1", "a"), pgens("b", " ")]) | ||||
assert node1.has("foobar", False) is False | assert node1.has("foobar", False) is False | ||||
assert node2.has(1, False) is True | assert node2.has(1, False) is True | ||||
@@ -138,6 +162,7 @@ def test_has(): | |||||
assert node1.has_param("foobar", False) is False | assert node1.has_param("foobar", False) is False | ||||
assert node2.has_param(1, False) is True | assert node2.has_param(1, False) is True | ||||
def test_get(): | def test_get(): | ||||
"""test Template.get()""" | """test Template.get()""" | ||||
node1 = Template(wraptext("foobar")) | node1 = Template(wraptext("foobar")) | ||||
@@ -159,16 +184,15 @@ def test_get(): | |||||
assert node3p2 is node3.get("1") | assert node3p2 is node3.get("1") | ||||
assert node4p1 is node4.get("b ") | assert node4p1 is node4.get("b ") | ||||
def test_add(): | def test_add(): | ||||
"""test Template.add()""" | """test Template.add()""" | ||||
node1 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) | node1 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) | ||||
node2 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) | node2 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) | ||||
node3 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) | node3 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) | ||||
node4 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) | node4 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) | ||||
node5 = Template(wraptext("a"), [pgens("b", "c"), | |||||
pgens(" d ", "e")]) | |||||
node6 = Template(wraptext("a"), [pgens("b", "c"), pgens("b", "d"), | |||||
pgens("b", "e")]) | |||||
node5 = Template(wraptext("a"), [pgens("b", "c"), pgens(" d ", "e")]) | |||||
node6 = Template(wraptext("a"), [pgens("b", "c"), pgens("b", "d"), pgens("b", "e")]) | |||||
node7 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) | node7 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) | ||||
node8p = pgenh("1", "d") | node8p = pgenh("1", "d") | ||||
node8 = Template(wraptext("a"), [pgens("b", "c"), node8p]) | node8 = Template(wraptext("a"), [pgens("b", "c"), node8p]) | ||||
@@ -176,48 +200,87 @@ def test_add(): | |||||
node10 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "e")]) | node10 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "e")]) | ||||
node11 = Template(wraptext("a"), [pgens("b", "c")]) | node11 = Template(wraptext("a"), [pgens("b", "c")]) | ||||
node12 = Template(wraptext("a"), [pgens("b", "c")]) | node12 = Template(wraptext("a"), [pgens("b", "c")]) | ||||
node13 = Template(wraptext("a"), [ | |||||
pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) | |||||
node14 = Template(wraptext("a\n"), [ | |||||
pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"), | |||||
pgens("h ", " i\n")]) | |||||
node15 = Template(wraptext("a"), [ | |||||
pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")]) | |||||
node16 = Template(wraptext("a"), [ | |||||
pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) | |||||
node13 = Template( | |||||
wraptext("a"), [pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")] | |||||
) | |||||
node14 = Template( | |||||
wraptext("a\n"), | |||||
[ | |||||
pgens("b ", "c\n"), | |||||
pgens("d ", " e"), | |||||
pgens("f ", "g\n"), | |||||
pgens("h ", " i\n"), | |||||
], | |||||
) | |||||
node15 = Template( | |||||
wraptext("a"), | |||||
[pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")], | |||||
) | |||||
node16 = Template( | |||||
wraptext("a"), [pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")] | |||||
) | |||||
node17 = Template(wraptext("a"), [pgenh("1", "b")]) | node17 = Template(wraptext("a"), [pgenh("1", "b")]) | ||||
node18 = Template(wraptext("a"), [pgenh("1", "b")]) | node18 = Template(wraptext("a"), [pgenh("1", "b")]) | ||||
node19 = Template(wraptext("a"), [pgenh("1", "b")]) | node19 = Template(wraptext("a"), [pgenh("1", "b")]) | ||||
node20 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), | |||||
pgenh("3", "d"), pgenh("4", "e")]) | |||||
node21 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), | |||||
pgens("4", "d"), pgens("5", "e")]) | |||||
node22 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), | |||||
pgens("4", "d"), pgens("5", "e")]) | |||||
node20 = Template( | |||||
wraptext("a"), | |||||
[pgenh("1", "b"), pgenh("2", "c"), pgenh("3", "d"), pgenh("4", "e")], | |||||
) | |||||
node21 = Template( | |||||
wraptext("a"), | |||||
[pgenh("1", "b"), pgenh("2", "c"), pgens("4", "d"), pgens("5", "e")], | |||||
) | |||||
node22 = Template( | |||||
wraptext("a"), | |||||
[pgenh("1", "b"), pgenh("2", "c"), pgens("4", "d"), pgens("5", "e")], | |||||
) | |||||
node23 = Template(wraptext("a"), [pgenh("1", "b")]) | node23 = Template(wraptext("a"), [pgenh("1", "b")]) | ||||
node24 = Template(wraptext("a"), [pgenh("1", "b")]) | node24 = Template(wraptext("a"), [pgenh("1", "b")]) | ||||
node25 = Template(wraptext("a"), [pgens("b", "c")]) | node25 = Template(wraptext("a"), [pgens("b", "c")]) | ||||
node26 = Template(wraptext("a"), [pgenh("1", "b")]) | node26 = Template(wraptext("a"), [pgenh("1", "b")]) | ||||
node27 = Template(wraptext("a"), [pgenh("1", "b")]) | node27 = Template(wraptext("a"), [pgenh("1", "b")]) | ||||
node28 = Template(wraptext("a"), [pgens("1", "b")]) | node28 = Template(wraptext("a"), [pgens("1", "b")]) | ||||
node29 = Template(wraptext("a"), [ | |||||
pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) | |||||
node30 = Template(wraptext("a\n"), [ | |||||
pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"), | |||||
pgens("h ", " i\n")]) | |||||
node31 = Template(wraptext("a"), [ | |||||
pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")]) | |||||
node32 = Template(wraptext("a"), [ | |||||
pgens("\nb ", " c "), pgens("\nd ", " e "), pgens("\nf ", " g ")]) | |||||
node33 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), | |||||
pgens("b", "f"), pgens("b", "h"), | |||||
pgens("i", "j")]) | |||||
node34 = Template(wraptext("a"), [pgens("1", "b"), pgens("x", "y"), | |||||
pgens("1", "c"), pgens("2", "d")]) | |||||
node35 = Template(wraptext("a"), [pgens("1", "b"), pgens("x", "y"), | |||||
pgenh("1", "c"), pgenh("2", "d")]) | |||||
node36 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), | |||||
pgens("f", "g")]) | |||||
node29 = Template( | |||||
wraptext("a"), [pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")] | |||||
) | |||||
node30 = Template( | |||||
wraptext("a\n"), | |||||
[ | |||||
pgens("b ", "c\n"), | |||||
pgens("d ", " e"), | |||||
pgens("f ", "g\n"), | |||||
pgens("h ", " i\n"), | |||||
], | |||||
) | |||||
node31 = Template( | |||||
wraptext("a"), | |||||
[pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")], | |||||
) | |||||
node32 = Template( | |||||
wraptext("a"), | |||||
[pgens("\nb ", " c "), pgens("\nd ", " e "), pgens("\nf ", " g ")], | |||||
) | |||||
node33 = Template( | |||||
wraptext("a"), | |||||
[ | |||||
pgens("b", "c"), | |||||
pgens("d", "e"), | |||||
pgens("b", "f"), | |||||
pgens("b", "h"), | |||||
pgens("i", "j"), | |||||
], | |||||
) | |||||
node34 = Template( | |||||
wraptext("a"), | |||||
[pgens("1", "b"), pgens("x", "y"), pgens("1", "c"), pgens("2", "d")], | |||||
) | |||||
node35 = Template( | |||||
wraptext("a"), | |||||
[pgens("1", "b"), pgens("x", "y"), pgenh("1", "c"), pgenh("2", "d")], | |||||
) | |||||
node36 = Template( | |||||
wraptext("a"), [pgens("b", "c"), pgens("d", "e"), pgens("f", "g")] | |||||
) | |||||
node37 = Template(wraptext("a"), [pgenh("1", "")]) | node37 = Template(wraptext("a"), [pgenh("1", "")]) | ||||
node38 = Template(wraptext("abc")) | node38 = Template(wraptext("abc")) | ||||
node39 = Template(wraptext("a"), [pgenh("1", " b ")]) | node39 = Template(wraptext("a"), [pgenh("1", " b ")]) | ||||
@@ -320,65 +383,121 @@ def test_add(): | |||||
assert "{{a|1= b|2= c|3= d}}" == node41 | assert "{{a|1= b|2= c|3= d}}" == node41 | ||||
assert "{{a|b=hello \n}}" == node42 | assert "{{a|b=hello \n}}" == node42 | ||||
def test_remove(): | def test_remove(): | ||||
"""test Template.remove()""" | """test Template.remove()""" | ||||
node1 = Template(wraptext("foobar")) | node1 = Template(wraptext("foobar")) | ||||
node2 = Template(wraptext("foo"), | |||||
[pgenh("1", "bar"), pgens("abc", "def")]) | |||||
node3 = Template(wraptext("foo"), | |||||
[pgenh("1", "bar"), pgens("abc", "def")]) | |||||
node4 = Template(wraptext("foo"), | |||||
[pgenh("1", "bar"), pgenh("2", "baz")]) | |||||
node5 = Template(wraptext("foo"), [ | |||||
pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) | |||||
node6 = Template(wraptext("foo"), [ | |||||
pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) | |||||
node7 = Template(wraptext("foo"), [ | |||||
pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")]) | |||||
node8 = Template(wraptext("foo"), [ | |||||
pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")]) | |||||
node9 = Template(wraptext("foo"), [ | |||||
pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) | |||||
node10 = Template(wraptext("foo"), [ | |||||
pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) | |||||
node11 = Template(wraptext("foo"), [ | |||||
pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) | |||||
node12 = Template(wraptext("foo"), [ | |||||
pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) | |||||
node13 = Template(wraptext("foo"), [ | |||||
pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) | |||||
node14 = Template(wraptext("foo"), [ | |||||
pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) | |||||
node15 = Template(wraptext("foo"), [ | |||||
pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) | |||||
node16 = Template(wraptext("foo"), [ | |||||
pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) | |||||
node17 = Template(wraptext("foo"), [ | |||||
pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) | |||||
node18 = Template(wraptext("foo"), [ | |||||
pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) | |||||
node19 = Template(wraptext("foo"), [ | |||||
pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) | |||||
node20 = Template(wraptext("foo"), [ | |||||
pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) | |||||
node21 = Template(wraptext("foo"), [ | |||||
pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), | |||||
pgens("a", "b")]) | |||||
node22 = Template(wraptext("foo"), [ | |||||
pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), | |||||
pgens("a", "b")]) | |||||
node23 = Template(wraptext("foo"), [ | |||||
pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), | |||||
pgens("a", "b")]) | |||||
node24 = Template(wraptext("foo"), [ | |||||
pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), | |||||
pgens("a", "b")]) | |||||
node25 = Template(wraptext("foo"), [ | |||||
pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), | |||||
pgens("a", "b")]) | |||||
node26 = Template(wraptext("foo"), [ | |||||
pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), | |||||
pgens("a", "b")]) | |||||
node2 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("abc", "def")]) | |||||
node3 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("abc", "def")]) | |||||
node4 = Template(wraptext("foo"), [pgenh("1", "bar"), pgenh("2", "baz")]) | |||||
node5 = Template( | |||||
wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] | |||||
) | |||||
node6 = Template( | |||||
wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] | |||||
) | |||||
node7 = Template( | |||||
wraptext("foo"), [pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")] | |||||
) | |||||
node8 = Template( | |||||
wraptext("foo"), [pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")] | |||||
) | |||||
node9 = Template( | |||||
wraptext("foo"), [pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")] | |||||
) | |||||
node10 = Template( | |||||
wraptext("foo"), [pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")] | |||||
) | |||||
node11 = Template( | |||||
wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] | |||||
) | |||||
node12 = Template( | |||||
wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] | |||||
) | |||||
node13 = Template( | |||||
wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] | |||||
) | |||||
node14 = Template( | |||||
wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] | |||||
) | |||||
node15 = Template( | |||||
wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] | |||||
) | |||||
node16 = Template( | |||||
wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] | |||||
) | |||||
node17 = Template( | |||||
wraptext("foo"), [pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")] | |||||
) | |||||
node18 = Template( | |||||
wraptext("foo"), [pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")] | |||||
) | |||||
node19 = Template( | |||||
wraptext("foo"), [pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")] | |||||
) | |||||
node20 = Template( | |||||
wraptext("foo"), [pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")] | |||||
) | |||||
node21 = Template( | |||||
wraptext("foo"), | |||||
[ | |||||
pgens("a", "b"), | |||||
pgens("c", "d"), | |||||
pgens("e", "f"), | |||||
pgens("a", "b"), | |||||
pgens("a", "b"), | |||||
], | |||||
) | |||||
node22 = Template( | |||||
wraptext("foo"), | |||||
[ | |||||
pgens("a", "b"), | |||||
pgens("c", "d"), | |||||
pgens("e", "f"), | |||||
pgens("a", "b"), | |||||
pgens("a", "b"), | |||||
], | |||||
) | |||||
node23 = Template( | |||||
wraptext("foo"), | |||||
[ | |||||
pgens("a", "b"), | |||||
pgens("c", "d"), | |||||
pgens("e", "f"), | |||||
pgens("a", "b"), | |||||
pgens("a", "b"), | |||||
], | |||||
) | |||||
node24 = Template( | |||||
wraptext("foo"), | |||||
[ | |||||
pgens("a", "b"), | |||||
pgens("c", "d"), | |||||
pgens("e", "f"), | |||||
pgens("a", "b"), | |||||
pgens("a", "b"), | |||||
], | |||||
) | |||||
node25 = Template( | |||||
wraptext("foo"), | |||||
[ | |||||
pgens("a", "b"), | |||||
pgens("c", "d"), | |||||
pgens("e", "f"), | |||||
pgens("a", "b"), | |||||
pgens("a", "b"), | |||||
], | |||||
) | |||||
node26 = Template( | |||||
wraptext("foo"), | |||||
[ | |||||
pgens("a", "b"), | |||||
pgens("c", "d"), | |||||
pgens("e", "f"), | |||||
pgens("a", "b"), | |||||
pgens("a", "b"), | |||||
], | |||||
) | |||||
node27 = Template(wraptext("foo"), [pgenh("1", "bar")]) | node27 = Template(wraptext("foo"), [pgenh("1", "bar")]) | ||||
node28 = Template(wraptext("foo"), [pgenh("1", "bar")]) | node28 = Template(wraptext("foo"), [pgenh("1", "bar")]) | ||||
@@ -444,12 +563,14 @@ def test_remove(): | |||||
with pytest.raises(ValueError): | with pytest.raises(ValueError): | ||||
node27.remove(node28.get(1)) | node27.remove(node28.get(1)) | ||||
def test_formatting(): | def test_formatting(): | ||||
"""test realistic param manipulation with complex whitespace formatting | """test realistic param manipulation with complex whitespace formatting | ||||
(assumes that parsing works correctly)""" | (assumes that parsing works correctly)""" | ||||
tests = [ | tests = [ | ||||
# https://en.wikipedia.org/w/index.php?title=Lamar_County,_Georgia&oldid=792356004 | |||||
("""{{Infobox U.S. county | |||||
# https://en.wikipedia.org/w/index.php?title=Lamar_County,_Georgia&oldid=792356004 | |||||
( | |||||
"""{{Infobox U.S. county | |||||
| county = Lamar County | | county = Lamar County | ||||
| state = Georgia | | state = Georgia | ||||
| seal = | | seal = | ||||
@@ -471,16 +592,17 @@ def test_formatting(): | |||||
| district = 3rd | | district = 3rd | ||||
| named for = [[Lucius Quintus Cincinnatus Lamar II]] | | named for = [[Lucius Quintus Cincinnatus Lamar II]] | ||||
}}""", | }}""", | ||||
"""@@ -11,4 +11,4 @@ | |||||
"""@@ -11,4 +11,4 @@ | |||||
| area percentage = 1.3% | | area percentage = 1.3% | ||||
-| census yr = 2010 | -| census yr = 2010 | ||||
-| pop = 18317 | -| pop = 18317 | ||||
+| census estimate yr = 2016 | +| census estimate yr = 2016 | ||||
+| pop = 12345<ref>example ref</ref> | +| pop = 12345<ref>example ref</ref> | ||||
| density_sq_mi = 100"""), | |||||
# https://en.wikipedia.org/w/index.php?title=Rockdale_County,_Georgia&oldid=792359760 | |||||
("""{{Infobox U.S. County| | |||||
| density_sq_mi = 100""", | |||||
), | |||||
# https://en.wikipedia.org/w/index.php?title=Rockdale_County,_Georgia&oldid=792359760 | |||||
( | |||||
"""{{Infobox U.S. County| | |||||
county = Rockdale County | | county = Rockdale County | | ||||
state = Georgia | | state = Georgia | | ||||
seal = | | seal = | | ||||
@@ -500,16 +622,17 @@ def test_formatting(): | |||||
| district = 4th | | district = 4th | ||||
| time zone= Eastern | | time zone= Eastern | ||||
}}""", | }}""", | ||||
"""@@ -11,4 +11,4 @@ | |||||
"""@@ -11,4 +11,4 @@ | |||||
area percentage = 1.7% | | area percentage = 1.7% | | ||||
- census yr = 2010| | - census yr = 2010| | ||||
- pop = 85215 | | - pop = 85215 | | ||||
+ census estimate yr = 2016 | | + census estimate yr = 2016 | | ||||
+ pop = 12345<ref>example ref</ref> | | + pop = 12345<ref>example ref</ref> | | ||||
density_sq_mi = 657 |"""), | |||||
# https://en.wikipedia.org/w/index.php?title=Spalding_County,_Georgia&oldid=792360413 | |||||
("""{{Infobox U.S. County| | |||||
density_sq_mi = 657 |""", | |||||
), | |||||
# https://en.wikipedia.org/w/index.php?title=Spalding_County,_Georgia&oldid=792360413 | |||||
( | |||||
"""{{Infobox U.S. County| | |||||
| county = Spalding County | | | county = Spalding County | | ||||
| state = Georgia | | | state = Georgia | | ||||
| seal = | | | seal = | | ||||
@@ -530,16 +653,17 @@ def test_formatting(): | |||||
| district = 3rd | | district = 3rd | ||||
| time zone = Eastern | | time zone = Eastern | ||||
}}""", | }}""", | ||||
"""@@ -11,4 +11,4 @@ | |||||
"""@@ -11,4 +11,4 @@ | |||||
| area percentage = 1.6% | | | area percentage = 1.6% | | ||||
-| census yr = 2010| | -| census yr = 2010| | ||||
-| pop = 64073 | | -| pop = 64073 | | ||||
+| | +| | ||||
+| census estimate yr = 2016 | pop = 12345<ref>example ref</ref> | | +| census estimate yr = 2016 | pop = 12345<ref>example ref</ref> | | ||||
| density_sq_mi = 326 |"""), | |||||
# https://en.wikipedia.org/w/index.php?title=Clinton_County,_Illinois&oldid=794694648 | |||||
("""{{Infobox U.S. county | |||||
| density_sq_mi = 326 |""", | |||||
), | |||||
# https://en.wikipedia.org/w/index.php?title=Clinton_County,_Illinois&oldid=794694648 | |||||
( | |||||
"""{{Infobox U.S. county | |||||
|county = Clinton County | |county = Clinton County | ||||
|state = Illinois | |state = Illinois | ||||
| ex image = File:Clinton County Courthouse, Carlyle.jpg | | ex image = File:Clinton County Courthouse, Carlyle.jpg | ||||
@@ -560,16 +684,17 @@ def test_formatting(): | |||||
|web = www.clintonco.illinois.gov | |web = www.clintonco.illinois.gov | ||||
| district = 15th | | district = 15th | ||||
}}""", | }}""", | ||||
"""@@ -15,4 +15,4 @@ | |||||
"""@@ -15,4 +15,4 @@ | |||||
|area percentage = 5.8% | |area percentage = 5.8% | ||||
- |census yr = 2010 | - |census yr = 2010 | ||||
- |pop = 37762 | - |pop = 37762 | ||||
+ |census estimate yr = 2016 | + |census estimate yr = 2016 | ||||
+ |pop = 12345<ref>example ref</ref> | + |pop = 12345<ref>example ref</ref> | ||||
|density_sq_mi = 80"""), | |||||
# https://en.wikipedia.org/w/index.php?title=Winnebago_County,_Illinois&oldid=789193800 | |||||
("""{{Infobox U.S. county | | |||||
|density_sq_mi = 80""", | |||||
), | |||||
# https://en.wikipedia.org/w/index.php?title=Winnebago_County,_Illinois&oldid=789193800 | |||||
( | |||||
"""{{Infobox U.S. county | | |||||
county = Winnebago County | | county = Winnebago County | | ||||
state = Illinois | | state = Illinois | | ||||
seal = Winnebago County il seal.png | | seal = Winnebago County il seal.png | | ||||
@@ -590,19 +715,21 @@ def test_formatting(): | |||||
| district = 16th | | district = 16th | ||||
| district2 = 17th | | district2 = 17th | ||||
}}""", | }}""", | ||||
"""@@ -11,4 +11,4 @@ | |||||
"""@@ -11,4 +11,4 @@ | |||||
area percentage = 1.1% | | area percentage = 1.1% | | ||||
- census yr = 2010| | - census yr = 2010| | ||||
- pop = 295266 | | - pop = 295266 | | ||||
+ census estimate yr = 2016| | + census estimate yr = 2016| | ||||
+ pop = 12345<ref>example ref</ref> | | + pop = 12345<ref>example ref</ref> | | ||||
density_sq_mi = 575""")] | |||||
density_sq_mi = 575""", | |||||
), | |||||
] | |||||
for (original, expected) in tests: | for (original, expected) in tests: | ||||
code = parse(original) | code = parse(original) | ||||
template = code.filter_templates()[0] | template = code.filter_templates()[0] | ||||
template.add("pop", "12345<ref>example ref</ref>") | template.add("pop", "12345<ref>example ref</ref>") | ||||
template.add('census estimate yr', "2016", before="pop") | |||||
template.add("census estimate yr", "2016", before="pop") | |||||
template.remove("census yr") | template.remove("census yr") | ||||
oldlines = original.splitlines(True) | oldlines = original.splitlines(True) | ||||
@@ -26,6 +26,7 @@ import pytest | |||||
from mwparserfromhell.nodes import Text | from mwparserfromhell.nodes import Text | ||||
def test_str(): | def test_str(): | ||||
"""test Text.__str__()""" | """test Text.__str__()""" | ||||
node = Text("foobar") | node = Text("foobar") | ||||
@@ -33,6 +34,7 @@ def test_str(): | |||||
node2 = Text("fóóbar") | node2 = Text("fóóbar") | ||||
assert "fóóbar" == str(node2) | assert "fóóbar" == str(node2) | ||||
def test_children(): | def test_children(): | ||||
"""test Text.__children__()""" | """test Text.__children__()""" | ||||
node = Text("foobar") | node = Text("foobar") | ||||
@@ -40,11 +42,13 @@ def test_children(): | |||||
with pytest.raises(StopIteration): | with pytest.raises(StopIteration): | ||||
next(gen) | next(gen) | ||||
def test_strip(): | def test_strip(): | ||||
"""test Text.__strip__()""" | """test Text.__strip__()""" | ||||
node = Text("foobar") | node = Text("foobar") | ||||
assert node is node.__strip__() | assert node is node.__strip__() | ||||
def test_showtree(): | def test_showtree(): | ||||
"""test Text.__showtree__()""" | """test Text.__showtree__()""" | ||||
output = [] | output = [] | ||||
@@ -57,6 +61,7 @@ def test_showtree(): | |||||
res = ["foobar", r"f\xf3\xf3bar", "\\U00010332\\U0001033f\\U00010344"] | res = ["foobar", r"f\xf3\xf3bar", "\\U00010332\\U0001033f\\U00010344"] | ||||
assert res == output | assert res == output | ||||
def test_value(): | def test_value(): | ||||
"""test getter/setter for the value attribute""" | """test getter/setter for the value attribute""" | ||||
node = Text("foobar") | node = Text("foobar") | ||||
@@ -33,29 +33,32 @@ try: | |||||
except ImportError: | except ImportError: | ||||
CTokenizer = None | CTokenizer = None | ||||
class _TestParseError(Exception): | class _TestParseError(Exception): | ||||
"""Raised internally when a test could not be parsed.""" | """Raised internally when a test could not be parsed.""" | ||||
def _parse_test(test, data): | def _parse_test(test, data): | ||||
"""Parse an individual *test*, storing its info in *data*.""" | """Parse an individual *test*, storing its info in *data*.""" | ||||
for line in test.strip().splitlines(): | for line in test.strip().splitlines(): | ||||
if line.startswith("name:"): | if line.startswith("name:"): | ||||
data["name"] = line[len("name:"):].strip() | |||||
data["name"] = line[len("name:") :].strip() | |||||
elif line.startswith("label:"): | elif line.startswith("label:"): | ||||
data["label"] = line[len("label:"):].strip() | |||||
data["label"] = line[len("label:") :].strip() | |||||
elif line.startswith("input:"): | elif line.startswith("input:"): | ||||
raw = line[len("input:"):].strip() | |||||
raw = line[len("input:") :].strip() | |||||
if raw[0] == '"' and raw[-1] == '"': | if raw[0] == '"' and raw[-1] == '"': | ||||
raw = raw[1:-1] | raw = raw[1:-1] | ||||
raw = raw.encode("raw_unicode_escape") | raw = raw.encode("raw_unicode_escape") | ||||
data["input"] = raw.decode("unicode_escape") | data["input"] = raw.decode("unicode_escape") | ||||
elif line.startswith("output:"): | elif line.startswith("output:"): | ||||
raw = line[len("output:"):].strip() | |||||
raw = line[len("output:") :].strip() | |||||
try: | try: | ||||
data["output"] = eval(raw, vars(tokens)) | data["output"] = eval(raw, vars(tokens)) | ||||
except Exception as err: | except Exception as err: | ||||
raise _TestParseError(err) from err | raise _TestParseError(err) from err | ||||
def _load_tests(filename, name, text): | def _load_tests(filename, name, text): | ||||
"""Load all tests in *text* from the file *filename*.""" | """Load all tests in *text* from the file *filename*.""" | ||||
tests = text.split("\n---\n") | tests = text.split("\n---\n") | ||||
@@ -77,15 +80,18 @@ def _load_tests(filename, name, text): | |||||
warnings.warn(error.format(filename)) | warnings.warn(error.format(filename)) | ||||
continue | continue | ||||
if data["input"] is None or data["output"] is None: | if data["input"] is None or data["output"] is None: | ||||
error = "Test '{}' in '{}' was ignored because it lacked an input or an output" | |||||
error = ( | |||||
"Test '{}' in '{}' was ignored because it lacked an input or an output" | |||||
) | |||||
warnings.warn(error.format(data["name"], filename)) | warnings.warn(error.format(data["name"], filename)) | ||||
continue | continue | ||||
# Include test filename in name | # Include test filename in name | ||||
data['name'] = '{}:{}'.format(name, data['name']) | |||||
data["name"] = "{}:{}".format(name, data["name"]) | |||||
yield data | yield data | ||||
def build(): | def build(): | ||||
"""Load and install all tests from the 'tokenizer' directory.""" | """Load and install all tests from the 'tokenizer' directory.""" | ||||
directory = path.join(path.dirname(__file__), "tokenizer") | directory = path.join(path.dirname(__file__), "tokenizer") | ||||
@@ -96,31 +102,37 @@ def build(): | |||||
fullname = path.join(directory, filename) | fullname = path.join(directory, filename) | ||||
with codecs.open(fullname, "r", encoding="utf8") as fp: | with codecs.open(fullname, "r", encoding="utf8") as fp: | ||||
text = fp.read() | text = fp.read() | ||||
name = path.split(fullname)[1][:-len(extension)] | |||||
name = path.split(fullname)[1][: -len(extension)] | |||||
yield from _load_tests(fullname, name, text) | yield from _load_tests(fullname, name, text) | ||||
@pytest.mark.parametrize("tokenizer", filter(None, ( | |||||
CTokenizer, PyTokenizer | |||||
)), ids=lambda t: 'CTokenizer' if t.USES_C else 'PyTokenizer') | |||||
@pytest.mark.parametrize("data", build(), ids=lambda data: data['name']) | |||||
@pytest.mark.parametrize( | |||||
"tokenizer", | |||||
filter(None, (CTokenizer, PyTokenizer)), | |||||
ids=lambda t: "CTokenizer" if t.USES_C else "PyTokenizer", | |||||
) | |||||
@pytest.mark.parametrize("data", build(), ids=lambda data: data["name"]) | |||||
def test_tokenizer(tokenizer, data): | def test_tokenizer(tokenizer, data): | ||||
expected = data["output"] | expected = data["output"] | ||||
actual = tokenizer().tokenize(data["input"]) | actual = tokenizer().tokenize(data["input"]) | ||||
assert expected == actual | assert expected == actual | ||||
@pytest.mark.parametrize("data", build(), ids=lambda data: data['name']) | |||||
@pytest.mark.parametrize("data", build(), ids=lambda data: data["name"]) | |||||
def test_roundtrip(data): | def test_roundtrip(data): | ||||
expected = data["input"] | expected = data["input"] | ||||
actual = str(Builder().build(data["output"][:])) | actual = str(Builder().build(data["output"][:])) | ||||
assert expected == actual | assert expected == actual | ||||
@pytest.mark.skipif(CTokenizer is None, reason='CTokenizer not available') | |||||
@pytest.mark.skipif(CTokenizer is None, reason="CTokenizer not available") | |||||
def test_c_tokenizer_uses_c(): | def test_c_tokenizer_uses_c(): | ||||
"""make sure the C tokenizer identifies as using a C extension""" | """make sure the C tokenizer identifies as using a C extension""" | ||||
assert CTokenizer.USES_C is True | assert CTokenizer.USES_C is True | ||||
assert CTokenizer().USES_C is True | assert CTokenizer().USES_C is True | ||||
def test_describe_context(): | def test_describe_context(): | ||||
assert "" == contexts.describe(0) | assert "" == contexts.describe(0) | ||||
ctx = contexts.describe(contexts.TEMPLATE_PARAM_KEY|contexts.HAS_TEXT) | |||||
ctx = contexts.describe(contexts.TEMPLATE_PARAM_KEY | contexts.HAS_TEXT) | |||||
assert "TEMPLATE_PARAM_KEY|HAS_TEXT" == ctx | assert "TEMPLATE_PARAM_KEY|HAS_TEXT" == ctx |
@@ -26,6 +26,7 @@ import pytest | |||||
from mwparserfromhell.parser import tokens | from mwparserfromhell.parser import tokens | ||||
@pytest.mark.parametrize("name", tokens.__all__) | @pytest.mark.parametrize("name", tokens.__all__) | ||||
def test_issubclass(name): | def test_issubclass(name): | ||||
"""check that all classes within the tokens module are really Tokens""" | """check that all classes within the tokens module are really Tokens""" | ||||
@@ -34,6 +35,7 @@ def test_issubclass(name): | |||||
assert isinstance(klass(), klass) | assert isinstance(klass(), klass) | ||||
assert isinstance(klass(), tokens.Token) | assert isinstance(klass(), tokens.Token) | ||||
def test_attributes(): | def test_attributes(): | ||||
"""check that Token attributes can be managed properly""" | """check that Token attributes can be managed properly""" | ||||
token1 = tokens.Token() | token1 = tokens.Token() | ||||
@@ -54,6 +56,7 @@ def test_attributes(): | |||||
with pytest.raises(KeyError): | with pytest.raises(KeyError): | ||||
token2.__delattr__("baz") | token2.__delattr__("baz") | ||||
def test_repr(): | def test_repr(): | ||||
"""check that repr() on a Token works as expected""" | """check that repr() on a Token works as expected""" | ||||
token1 = tokens.Token() | token1 = tokens.Token() | ||||
@@ -65,6 +68,7 @@ def test_repr(): | |||||
assert repr(token2) in ("Token(foo='bar', baz=123)", "Token(baz=123, foo='bar')") | assert repr(token2) in ("Token(foo='bar', baz=123)", "Token(baz=123, foo='bar')") | ||||
assert "Text(text='" + hundredchars + "')" == repr(token3) | assert "Text(text='" + hundredchars + "')" == repr(token3) | ||||
def test_equality(): | def test_equality(): | ||||
"""check that equivalent tokens are considered equal""" | """check that equivalent tokens are considered equal""" | ||||
token1 = tokens.Token() | token1 = tokens.Token() | ||||
@@ -83,11 +87,11 @@ def test_equality(): | |||||
assert token4 != token6 | assert token4 != token6 | ||||
assert token5 != token6 | assert token5 != token6 | ||||
@pytest.mark.parametrize("token", [ | |||||
tokens.Token(), | |||||
tokens.Token(foo="bar", baz=123), | |||||
tokens.Text(text="earwig") | |||||
]) | |||||
@pytest.mark.parametrize( | |||||
"token", | |||||
[tokens.Token(), tokens.Token(foo="bar", baz=123), tokens.Text(text="earwig")], | |||||
) | |||||
def test_repr_equality(token): | def test_repr_equality(token): | ||||
"""check that eval(repr(token)) == token""" | """check that eval(repr(token)) == token""" | ||||
assert token == eval(repr(token), vars(tokens)) | assert token == eval(repr(token), vars(tokens)) |
@@ -28,28 +28,33 @@ from mwparserfromhell.nodes import Template, Text | |||||
from mwparserfromhell.utils import parse_anything | from mwparserfromhell.utils import parse_anything | ||||
from .conftest import assert_wikicode_equal, wrap, wraptext | from .conftest import assert_wikicode_equal, wrap, wraptext | ||||
@pytest.mark.parametrize("test,valid", [ | |||||
(wraptext("foobar"), wraptext("foobar")), | |||||
(Template(wraptext("spam")), wrap([Template(wraptext("spam"))])), | |||||
("fóóbar", wraptext("fóóbar")), | |||||
(b"foob\xc3\xa1r", wraptext("foobár")), | |||||
(123, wraptext("123")), | |||||
(True, wraptext("True")), | |||||
(None, wrap([])), | |||||
([Text("foo"), Text("bar"), Text("baz")], | |||||
wraptext("foo", "bar", "baz")), | |||||
([wraptext("foo"), Text("bar"), "baz", 123, 456], | |||||
wraptext("foo", "bar", "baz", "123", "456")), | |||||
([[[([[((("foo",),),)], "bar"],)]]], wraptext("foo", "bar")) | |||||
]) | |||||
@pytest.mark.parametrize( | |||||
"test,valid", | |||||
[ | |||||
(wraptext("foobar"), wraptext("foobar")), | |||||
(Template(wraptext("spam")), wrap([Template(wraptext("spam"))])), | |||||
("fóóbar", wraptext("fóóbar")), | |||||
(b"foob\xc3\xa1r", wraptext("foobár")), | |||||
(123, wraptext("123")), | |||||
(True, wraptext("True")), | |||||
(None, wrap([])), | |||||
([Text("foo"), Text("bar"), Text("baz")], wraptext("foo", "bar", "baz")), | |||||
( | |||||
[wraptext("foo"), Text("bar"), "baz", 123, 456], | |||||
wraptext("foo", "bar", "baz", "123", "456"), | |||||
), | |||||
([[[([[((("foo",),),)], "bar"],)]]], wraptext("foo", "bar")), | |||||
], | |||||
) | |||||
def test_parse_anything_valid(test, valid): | def test_parse_anything_valid(test, valid): | ||||
"""tests for valid input to utils.parse_anything()""" | """tests for valid input to utils.parse_anything()""" | ||||
assert_wikicode_equal(valid, parse_anything(test)) | assert_wikicode_equal(valid, parse_anything(test)) | ||||
@pytest.mark.parametrize("invalid", [ | |||||
Ellipsis, object, object(), type, | |||||
["foo", [object]] | |||||
]) | |||||
@pytest.mark.parametrize( | |||||
"invalid", [Ellipsis, object, object(), type, ["foo", [object]]] | |||||
) | |||||
def test_parse_anything_invalid(invalid): | def test_parse_anything_invalid(invalid): | ||||
"""tests for invalid input to utils.parse_anything()""" | """tests for invalid input to utils.parse_anything()""" | ||||
with pytest.raises(ValueError): | with pytest.raises(ValueError): | ||||
@@ -34,6 +34,7 @@ from mwparserfromhell.wikicode import Wikicode | |||||
from mwparserfromhell import parse | from mwparserfromhell import parse | ||||
from .conftest import wrap, wraptext | from .conftest import wrap, wraptext | ||||
def test_str(): | def test_str(): | ||||
"""test Wikicode.__str__()""" | """test Wikicode.__str__()""" | ||||
code1 = parse("foobar") | code1 = parse("foobar") | ||||
@@ -41,6 +42,7 @@ def test_str(): | |||||
assert "foobar" == str(code1) | assert "foobar" == str(code1) | ||||
assert "Have a {{template}} and a [[page|link]]" == str(code2) | assert "Have a {{template}} and a [[page|link]]" == str(code2) | ||||
def test_nodes(): | def test_nodes(): | ||||
"""test getter/setter for the nodes attribute""" | """test getter/setter for the nodes attribute""" | ||||
code = parse("Have a {{template}}") | code = parse("Have a {{template}}") | ||||
@@ -57,6 +59,7 @@ def test_nodes(): | |||||
with pytest.raises(ValueError): | with pytest.raises(ValueError): | ||||
code.__setattr__("nodes", object) | code.__setattr__("nodes", object) | ||||
def test_get(): | def test_get(): | ||||
"""test Wikicode.get()""" | """test Wikicode.get()""" | ||||
code = parse("Have a {{template}} and a [[page|link]]") | code = parse("Have a {{template}} and a [[page|link]]") | ||||
@@ -65,6 +68,7 @@ def test_get(): | |||||
with pytest.raises(IndexError): | with pytest.raises(IndexError): | ||||
code.get(4) | code.get(4) | ||||
def test_set(): | def test_set(): | ||||
"""test Wikicode.set()""" | """test Wikicode.set()""" | ||||
code = parse("Have a {{template}} and a [[page|link]]") | code = parse("Have a {{template}} and a [[page|link]]") | ||||
@@ -82,6 +86,7 @@ def test_set(): | |||||
with pytest.raises(IndexError): | with pytest.raises(IndexError): | ||||
code.set(-4, "{{baz}}") | code.set(-4, "{{baz}}") | ||||
def test_contains(): | def test_contains(): | ||||
"""test Wikicode.contains()""" | """test Wikicode.contains()""" | ||||
code = parse("Here is {{aaa|{{bbb|xyz{{ccc}}}}}} and a [[page|link]]") | code = parse("Here is {{aaa|{{bbb|xyz{{ccc}}}}}} and a [[page|link]]") | ||||
@@ -93,6 +98,7 @@ def test_contains(): | |||||
assert code.contains(str(tmpl4)) is True | assert code.contains(str(tmpl4)) is True | ||||
assert code.contains(tmpl2.params[0].value) is True | assert code.contains(tmpl2.params[0].value) is True | ||||
def test_index(): | def test_index(): | ||||
"""test Wikicode.index()""" | """test Wikicode.index()""" | ||||
code = parse("Have a {{template}} and a [[page|link]]") | code = parse("Have a {{template}} and a [[page|link]]") | ||||
@@ -105,13 +111,13 @@ def test_index(): | |||||
code = parse("{{foo}}{{bar|{{baz}}}}") | code = parse("{{foo}}{{bar|{{baz}}}}") | ||||
assert 1 == code.index("{{bar|{{baz}}}}") | assert 1 == code.index("{{bar|{{baz}}}}") | ||||
assert 1 == code.index("{{baz}}", recursive=True) | assert 1 == code.index("{{baz}}", recursive=True) | ||||
assert 1 == code.index(code.get(1).get(1).value, | |||||
recursive=True) | |||||
assert 1 == code.index(code.get(1).get(1).value, recursive=True) | |||||
with pytest.raises(ValueError): | with pytest.raises(ValueError): | ||||
code.index("{{baz}}", recursive=False) | code.index("{{baz}}", recursive=False) | ||||
with pytest.raises(ValueError): | with pytest.raises(ValueError): | ||||
code.index(code.get(1).get(1).value, recursive=False) | code.index(code.get(1).get(1).value, recursive=False) | ||||
def test_get_ancestors_parent(): | def test_get_ancestors_parent(): | ||||
"""test Wikicode.get_ancestors() and Wikicode.get_parent()""" | """test Wikicode.get_ancestors() and Wikicode.get_parent()""" | ||||
code = parse("{{a|{{b|{{d|{{e}}{{f}}}}{{g}}}}}}{{c}}") | code = parse("{{a|{{b|{{d|{{e}}{{f}}}}{{g}}}}}}{{c}}") | ||||
@@ -130,6 +136,7 @@ def test_get_ancestors_parent(): | |||||
with pytest.raises(ValueError): | with pytest.raises(ValueError): | ||||
code.get_parent(fake) | code.get_parent(fake) | ||||
def test_insert(): | def test_insert(): | ||||
"""test Wikicode.insert()""" | """test Wikicode.insert()""" | ||||
code = parse("Have a {{template}} and a [[page|link]]") | code = parse("Have a {{template}} and a [[page|link]]") | ||||
@@ -144,14 +151,22 @@ def test_insert(): | |||||
code2 = parse("{{foo}}{{bar}}{{baz}}") | code2 = parse("{{foo}}{{bar}}{{baz}}") | ||||
code2.insert(1, "abc{{def}}ghi[[jk]]") | code2.insert(1, "abc{{def}}ghi[[jk]]") | ||||
assert "{{foo}}abc{{def}}ghi[[jk]]{{bar}}{{baz}}" == code2 | assert "{{foo}}abc{{def}}ghi[[jk]]{{bar}}{{baz}}" == code2 | ||||
assert ["{{foo}}", "abc", "{{def}}", "ghi", "[[jk]]", | |||||
"{{bar}}", "{{baz}}"] == code2.nodes | |||||
assert [ | |||||
"{{foo}}", | |||||
"abc", | |||||
"{{def}}", | |||||
"ghi", | |||||
"[[jk]]", | |||||
"{{bar}}", | |||||
"{{baz}}", | |||||
] == code2.nodes | |||||
code3 = parse("{{foo}}bar") | code3 = parse("{{foo}}bar") | ||||
code3.insert(1000, "[[baz]]") | code3.insert(1000, "[[baz]]") | ||||
code3.insert(-1000, "derp") | code3.insert(-1000, "derp") | ||||
assert "derp{{foo}}bar[[baz]]" == code3 | assert "derp{{foo}}bar[[baz]]" == code3 | ||||
def _test_search(meth, expected): | def _test_search(meth, expected): | ||||
"""Base test for insert_before(), insert_after(), and replace().""" | """Base test for insert_before(), insert_after(), and replace().""" | ||||
code = parse("{{a}}{{b}}{{c}}{{d}}{{e}}") | code = parse("{{a}}{{b}}{{c}}{{d}}{{e}}") | ||||
@@ -249,6 +264,7 @@ def _test_search(meth, expected): | |||||
meth(code9, code9.get_sections()[0], "{{quz}}") | meth(code9, code9.get_sections()[0], "{{quz}}") | ||||
assert expected[8] == code9 | assert expected[8] == code9 | ||||
def test_insert_before(): | def test_insert_before(): | ||||
"""test Wikicode.insert_before()""" | """test Wikicode.insert_before()""" | ||||
meth = lambda code, *args, **kw: code.insert_before(*args, **kw) | meth = lambda code, *args, **kw: code.insert_before(*args, **kw) | ||||
@@ -265,6 +281,7 @@ def test_insert_before(): | |||||
] | ] | ||||
_test_search(meth, expected) | _test_search(meth, expected) | ||||
def test_insert_after(): | def test_insert_after(): | ||||
"""test Wikicode.insert_after()""" | """test Wikicode.insert_after()""" | ||||
meth = lambda code, *args, **kw: code.insert_after(*args, **kw) | meth = lambda code, *args, **kw: code.insert_after(*args, **kw) | ||||
@@ -281,6 +298,7 @@ def test_insert_after(): | |||||
] | ] | ||||
_test_search(meth, expected) | _test_search(meth, expected) | ||||
def test_replace(): | def test_replace(): | ||||
"""test Wikicode.replace()""" | """test Wikicode.replace()""" | ||||
meth = lambda code, *args, **kw: code.replace(*args, **kw) | meth = lambda code, *args, **kw: code.replace(*args, **kw) | ||||
@@ -297,6 +315,7 @@ def test_replace(): | |||||
] | ] | ||||
_test_search(meth, expected) | _test_search(meth, expected) | ||||
def test_append(): | def test_append(): | ||||
"""test Wikicode.append()""" | """test Wikicode.append()""" | ||||
code = parse("Have a {{template}}") | code = parse("Have a {{template}}") | ||||
@@ -310,6 +329,7 @@ def test_append(): | |||||
with pytest.raises(ValueError): | with pytest.raises(ValueError): | ||||
code.append(slice(0, 1)) | code.append(slice(0, 1)) | ||||
def test_remove(): | def test_remove(): | ||||
"""test Wikicode.remove()""" | """test Wikicode.remove()""" | ||||
meth = lambda code, obj, value, **kw: code.remove(obj, **kw) | meth = lambda code, obj, value, **kw: code.remove(obj, **kw) | ||||
@@ -326,6 +346,7 @@ def test_remove(): | |||||
] | ] | ||||
_test_search(meth, expected) | _test_search(meth, expected) | ||||
def test_matches(): | def test_matches(): | ||||
"""test Wikicode.matches()""" | """test Wikicode.matches()""" | ||||
code1 = parse("Cleanup") | code1 = parse("Cleanup") | ||||
@@ -357,17 +378,32 @@ def test_matches(): | |||||
assert code5.matches("<!-- nothing -->") is True | assert code5.matches("<!-- nothing -->") is True | ||||
assert code5.matches(("a", "b", "")) is True | assert code5.matches(("a", "b", "")) is True | ||||
def test_filter_family(): | def test_filter_family(): | ||||
"""test the Wikicode.i?filter() family of functions""" | """test the Wikicode.i?filter() family of functions""" | ||||
def genlist(gen): | def genlist(gen): | ||||
assert isinstance(gen, GeneratorType) | assert isinstance(gen, GeneratorType) | ||||
return list(gen) | return list(gen) | ||||
ifilter = lambda code: (lambda *a, **k: genlist(code.ifilter(*a, **k))) | ifilter = lambda code: (lambda *a, **k: genlist(code.ifilter(*a, **k))) | ||||
code = parse("a{{b}}c[[d]]{{{e}}}{{f}}[[g]]") | code = parse("a{{b}}c[[d]]{{{e}}}{{f}}[[g]]") | ||||
for func in (code.filter, ifilter(code)): | for func in (code.filter, ifilter(code)): | ||||
assert ["a", "{{b}}", "b", "c", "[[d]]", "d", "{{{e}}}", | |||||
"e", "{{f}}", "f", "[[g]]", "g"] == func() | |||||
assert [ | |||||
"a", | |||||
"{{b}}", | |||||
"b", | |||||
"c", | |||||
"[[d]]", | |||||
"d", | |||||
"{{{e}}}", | |||||
"e", | |||||
"{{f}}", | |||||
"f", | |||||
"[[g]]", | |||||
"g", | |||||
] == func() | |||||
assert ["{{{e}}}"] == func(forcetype=Argument) | assert ["{{{e}}}"] == func(forcetype=Argument) | ||||
assert code.get(4) is func(forcetype=Argument)[0] | assert code.get(4) is func(forcetype=Argument)[0] | ||||
assert list("abcdefg") == func(forcetype=Text) | assert list("abcdefg") == func(forcetype=Text) | ||||
@@ -377,7 +413,7 @@ def test_filter_family(): | |||||
funcs = [ | funcs = [ | ||||
lambda name, **kw: getattr(code, "filter_" + name)(**kw), | lambda name, **kw: getattr(code, "filter_" + name)(**kw), | ||||
lambda name, **kw: genlist(getattr(code, "ifilter_" + name)(**kw)) | |||||
lambda name, **kw: genlist(getattr(code, "ifilter_" + name)(**kw)), | |||||
] | ] | ||||
for get_filter in funcs: | for get_filter in funcs: | ||||
assert ["{{{e}}}"] == get_filter("arguments") | assert ["{{{e}}}"] == get_filter("arguments") | ||||
@@ -393,27 +429,35 @@ def test_filter_family(): | |||||
code2 = parse("{{a|{{b}}|{{c|d={{f}}{{h}}}}}}") | code2 = parse("{{a|{{b}}|{{c|d={{f}}{{h}}}}}}") | ||||
for func in (code2.filter, ifilter(code2)): | for func in (code2.filter, ifilter(code2)): | ||||
assert ["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"] \ | |||||
== func(recursive=False, forcetype=Template) | |||||
assert ["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}", | |||||
"{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"] \ | |||||
== func(recursive=True, forcetype=Template) | |||||
assert ["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"] == func( | |||||
recursive=False, forcetype=Template | |||||
) | |||||
assert [ | |||||
"{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", | |||||
"{{b}}", | |||||
"{{c|d={{f}}{{h}}}}", | |||||
"{{f}}", | |||||
"{{h}}", | |||||
] == func(recursive=True, forcetype=Template) | |||||
code3 = parse("{{foobar}}{{FOO}}{{baz}}{{bz}}{{barfoo}}") | code3 = parse("{{foobar}}{{FOO}}{{baz}}{{bz}}{{barfoo}}") | ||||
for func in (code3.filter, ifilter(code3)): | for func in (code3.filter, ifilter(code3)): | ||||
assert ["{{foobar}}", "{{barfoo}}"] \ | |||||
== func(False, matches=lambda node: "foo" in node) | |||||
assert ["{{foobar}}", "{{FOO}}", "{{barfoo}}"] \ | |||||
== func(False, matches=r"foo") | |||||
assert ["{{foobar}}", "{{FOO}}"] \ | |||||
== func(matches=r"^{{foo.*?}}") | |||||
assert ["{{foobar}}"] \ | |||||
== func(matches=r"^{{foo.*?}}", flags=re.UNICODE) | |||||
assert ["{{foobar}}", "{{barfoo}}"] == func( | |||||
False, matches=lambda node: "foo" in node | |||||
) | |||||
assert ["{{foobar}}", "{{FOO}}", "{{barfoo}}"] == func(False, matches=r"foo") | |||||
assert ["{{foobar}}", "{{FOO}}"] == func(matches=r"^{{foo.*?}}") | |||||
assert ["{{foobar}}"] == func(matches=r"^{{foo.*?}}", flags=re.UNICODE) | |||||
assert ["{{baz}}", "{{bz}}"] == func(matches=r"^{{b.*?z") | assert ["{{baz}}", "{{bz}}"] == func(matches=r"^{{b.*?z") | ||||
assert ["{{baz}}"] == func(matches=r"^{{b.+?z}}") | assert ["{{baz}}"] == func(matches=r"^{{b.+?z}}") | ||||
exp_rec = ["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}", | |||||
"{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"] | |||||
exp_rec = [ | |||||
"{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", | |||||
"{{b}}", | |||||
"{{c|d={{f}}{{h}}}}", | |||||
"{{f}}", | |||||
"{{h}}", | |||||
] | |||||
exp_unrec = ["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"] | exp_unrec = ["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"] | ||||
assert exp_rec == code2.filter_templates() | assert exp_rec == code2.filter_templates() | ||||
assert exp_unrec == code2.filter_templates(recursive=False) | assert exp_unrec == code2.filter_templates(recursive=False) | ||||
@@ -422,9 +466,9 @@ def test_filter_family(): | |||||
assert exp_unrec == code2.filter_templates(False) | assert exp_unrec == code2.filter_templates(False) | ||||
assert ["{{foobar}}"] == code3.filter_templates( | assert ["{{foobar}}"] == code3.filter_templates( | ||||
matches=lambda node: node.name.matches("Foobar")) | |||||
assert ["{{baz}}", "{{bz}}"] \ | |||||
== code3.filter_templates(matches=r"^{{b.*?z") | |||||
matches=lambda node: node.name.matches("Foobar") | |||||
) | |||||
assert ["{{baz}}", "{{bz}}"] == code3.filter_templates(matches=r"^{{b.*?z") | |||||
assert [] == code3.filter_tags(matches=r"^{{b.*?z") | assert [] == code3.filter_tags(matches=r"^{{b.*?z") | ||||
assert [] == code3.filter_tags(matches=r"^{{b.*?z", flags=0) | assert [] == code3.filter_tags(matches=r"^{{b.*?z", flags=0) | ||||
with pytest.raises(TypeError): | with pytest.raises(TypeError): | ||||
@@ -440,6 +484,7 @@ def test_filter_family(): | |||||
assert ["{{foo}}", "{{foo|{{bar}}}}"] == actual1 | assert ["{{foo}}", "{{foo|{{bar}}}}"] == actual1 | ||||
assert ["{{foo}}", "{{foo|{{bar}}}}"] == actual2 | assert ["{{foo}}", "{{foo|{{bar}}}}"] == actual2 | ||||
def test_get_sections(): | def test_get_sections(): | ||||
"""test Wikicode.get_sections()""" | """test Wikicode.get_sections()""" | ||||
page1 = parse("") | page1 = parse("") | ||||
@@ -461,44 +506,70 @@ def test_get_sections(): | |||||
assert [""] == page1.get_sections() | assert [""] == page1.get_sections() | ||||
assert ["", "==Heading=="] == page2.get_sections() | assert ["", "==Heading=="] == page2.get_sections() | ||||
assert ["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n", "====Gnidaeh====\n"] \ | |||||
== page3.get_sections() | |||||
assert [p4_lead, p4_I, p4_IA, p4_IB, p4_IB1, p4_II, | |||||
p4_III, p4_IIIA, p4_IIIA1a, p4_IIIA2, p4_IIIA2ai1] \ | |||||
== page4.get_sections() | |||||
assert [ | |||||
"", | |||||
"===Heading===\nFoo bar baz\n====Gnidaeh====\n", | |||||
"====Gnidaeh====\n", | |||||
] == page3.get_sections() | |||||
assert [ | |||||
p4_lead, | |||||
p4_I, | |||||
p4_IA, | |||||
p4_IB, | |||||
p4_IB1, | |||||
p4_II, | |||||
p4_III, | |||||
p4_IIIA, | |||||
p4_IIIA1a, | |||||
p4_IIIA2, | |||||
p4_IIIA2ai1, | |||||
] == page4.get_sections() | |||||
assert ["====Gnidaeh====\n"] == page3.get_sections(levels=[4]) | assert ["====Gnidaeh====\n"] == page3.get_sections(levels=[4]) | ||||
assert ["===Heading===\nFoo bar baz\n====Gnidaeh====\n"] \ | |||||
== page3.get_sections(levels=(2, 3)) | |||||
assert ["===Heading===\nFoo bar baz\n"] \ | |||||
== page3.get_sections(levels=(2, 3), flat=True) | |||||
assert ["===Heading===\nFoo bar baz\n====Gnidaeh====\n"] == page3.get_sections( | |||||
levels=(2, 3) | |||||
) | |||||
assert ["===Heading===\nFoo bar baz\n"] == page3.get_sections( | |||||
levels=(2, 3), flat=True | |||||
) | |||||
assert [] == page3.get_sections(levels=[0]) | assert [] == page3.get_sections(levels=[0]) | ||||
assert ["", "====Gnidaeh====\n"] == page3.get_sections(levels=[4], include_lead=True) | |||||
assert ["===Heading===\nFoo bar baz\n====Gnidaeh====\n", | |||||
"====Gnidaeh====\n"] == page3.get_sections(include_lead=False) | |||||
assert ["===Heading===\nFoo bar baz\n", "====Gnidaeh====\n"] \ | |||||
== page3.get_sections(flat=True, include_lead=False) | |||||
assert ["", "====Gnidaeh====\n"] == page3.get_sections( | |||||
levels=[4], include_lead=True | |||||
) | |||||
assert [ | |||||
"===Heading===\nFoo bar baz\n====Gnidaeh====\n", | |||||
"====Gnidaeh====\n", | |||||
] == page3.get_sections(include_lead=False) | |||||
assert ["===Heading===\nFoo bar baz\n", "====Gnidaeh====\n"] == page3.get_sections( | |||||
flat=True, include_lead=False | |||||
) | |||||
assert [p4_IB1, p4_IIIA2] == page4.get_sections(levels=[4]) | assert [p4_IB1, p4_IIIA2] == page4.get_sections(levels=[4]) | ||||
assert [p4_IA, p4_IB, p4_IIIA] == page4.get_sections(levels=[3]) | assert [p4_IA, p4_IB, p4_IIIA] == page4.get_sections(levels=[3]) | ||||
assert [p4_IA, "=== Section I.B ===\n", | |||||
"=== Section III.A ===\nText.\n"] \ | |||||
== page4.get_sections(levels=[3], flat=True) | |||||
assert [ | |||||
p4_IA, | |||||
"=== Section I.B ===\n", | |||||
"=== Section III.A ===\nText.\n", | |||||
] == page4.get_sections(levels=[3], flat=True) | |||||
assert ["", ""] == page2.get_sections(include_headings=False) | assert ["", ""] == page2.get_sections(include_headings=False) | ||||
assert ["\nSection I.B.1 body.\n\n•Some content.\n\n", | |||||
"\nEven more text.\n" + p4_IIIA2ai1] \ | |||||
== page4.get_sections(levels=[4], include_headings=False) | |||||
assert [ | |||||
"\nSection I.B.1 body.\n\n•Some content.\n\n", | |||||
"\nEven more text.\n" + p4_IIIA2ai1, | |||||
] == page4.get_sections(levels=[4], include_headings=False) | |||||
assert [] == page4.get_sections(matches=r"body") | assert [] == page4.get_sections(matches=r"body") | ||||
assert [p4_I, p4_IA, p4_IB, p4_IB1] \ | |||||
== page4.get_sections(matches=r"Section\sI[.\s].*?") | |||||
assert [p4_IA, p4_IIIA, p4_IIIA1a, p4_IIIA2, p4_IIIA2ai1] \ | |||||
== page4.get_sections(matches=r".*?a.*?") | |||||
assert [p4_IIIA1a, p4_IIIA2ai1] \ | |||||
== page4.get_sections(matches=r".*?a.*?", flags=re.U) | |||||
assert ["\nMore text.\n", "\nAn invalid section!"] \ | |||||
== page4.get_sections(matches=r".*?a.*?", flags=re.U, | |||||
include_headings=False) | |||||
assert [p4_I, p4_IA, p4_IB, p4_IB1] == page4.get_sections( | |||||
matches=r"Section\sI[.\s].*?" | |||||
) | |||||
assert [p4_IA, p4_IIIA, p4_IIIA1a, p4_IIIA2, p4_IIIA2ai1] == page4.get_sections( | |||||
matches=r".*?a.*?" | |||||
) | |||||
assert [p4_IIIA1a, p4_IIIA2ai1] == page4.get_sections( | |||||
matches=r".*?a.*?", flags=re.U | |||||
) | |||||
assert ["\nMore text.\n", "\nAn invalid section!"] == page4.get_sections( | |||||
matches=r".*?a.*?", flags=re.U, include_headings=False | |||||
) | |||||
sections = page2.get_sections(include_headings=False) | sections = page2.get_sections(include_headings=False) | ||||
sections[0].append("Lead!\n") | sections[0].append("Lead!\n") | ||||
@@ -512,22 +583,22 @@ def test_get_sections(): | |||||
assert "== Foo ==\nBarf {{Haha}}\n" == section | assert "== Foo ==\nBarf {{Haha}}\n" == section | ||||
assert "X\n== Foo ==\nBarf {{Haha}}\n== Baz ==\nBuzz" == page5 | assert "X\n== Foo ==\nBarf {{Haha}}\n== Baz ==\nBuzz" == page5 | ||||
def test_strip_code(): | def test_strip_code(): | ||||
"""test Wikicode.strip_code()""" | """test Wikicode.strip_code()""" | ||||
# Since individual nodes have test cases for their __strip__ methods, | # Since individual nodes have test cases for their __strip__ methods, | ||||
# we're only going to do an integration test: | # we're only going to do an integration test: | ||||
code = parse("Foo [[bar]]\n\n{{baz|hello}}\n\n[[a|b]] Σ") | code = parse("Foo [[bar]]\n\n{{baz|hello}}\n\n[[a|b]] Σ") | ||||
assert "Foo bar\n\nb Σ" \ | |||||
== code.strip_code(normalize=True, collapse=True) | |||||
assert "Foo bar\n\n\n\nb Σ" \ | |||||
== code.strip_code(normalize=True, collapse=False) | |||||
assert "Foo bar\n\nb Σ" \ | |||||
== code.strip_code(normalize=False, collapse=True) | |||||
assert "Foo bar\n\n\n\nb Σ" \ | |||||
== code.strip_code(normalize=False, collapse=False) | |||||
assert "Foo bar\n\nhello\n\nb Σ" \ | |||||
== code.strip_code(normalize=True, collapse=True, | |||||
keep_template_params=True) | |||||
assert "Foo bar\n\nb Σ" == code.strip_code(normalize=True, collapse=True) | |||||
assert "Foo bar\n\n\n\nb Σ" == code.strip_code(normalize=True, collapse=False) | |||||
assert "Foo bar\n\nb Σ" == code.strip_code(normalize=False, collapse=True) | |||||
assert "Foo bar\n\n\n\nb Σ" == code.strip_code( | |||||
normalize=False, collapse=False | |||||
) | |||||
assert "Foo bar\n\nhello\n\nb Σ" == code.strip_code( | |||||
normalize=True, collapse=True, keep_template_params=True | |||||
) | |||||
def test_get_tree(): | def test_get_tree(): | ||||
"""test Wikicode.get_tree()""" | """test Wikicode.get_tree()""" | ||||
@@ -535,6 +606,8 @@ def test_get_tree(): | |||||
# methods, and the docstring covers all possibilities for the output of | # methods, and the docstring covers all possibilities for the output of | ||||
# __showtree__, we'll test it only: | # __showtree__, we'll test it only: | ||||
code = parse("Lorem ipsum {{foo|bar|{{baz}}|spam=eggs}}") | code = parse("Lorem ipsum {{foo|bar|{{baz}}|spam=eggs}}") | ||||
expected = "Lorem ipsum \n{{\n\t foo\n\t| 1\n\t= bar\n\t| 2\n\t= " + \ | |||||
"{{\n\t\t\tbaz\n\t }}\n\t| spam\n\t= eggs\n}}" | |||||
expected = ( | |||||
"Lorem ipsum \n{{\n\t foo\n\t| 1\n\t= bar\n\t| 2\n\t= " | |||||
+ "{{\n\t\t\tbaz\n\t }}\n\t| spam\n\t= eggs\n}}" | |||||
) | |||||
assert expected.expandtabs(4) == code.get_tree() | assert expected.expandtabs(4) == code.get_tree() |
@@ -27,6 +27,7 @@ import pytest | |||||
from mwparserfromhell.nodes import Text, Wikilink | from mwparserfromhell.nodes import Text, Wikilink | ||||
from .conftest import assert_wikicode_equal, wrap, wraptext | from .conftest import assert_wikicode_equal, wrap, wraptext | ||||
def test_str(): | def test_str(): | ||||
"""test Wikilink.__str__()""" | """test Wikilink.__str__()""" | ||||
node = Wikilink(wraptext("foobar")) | node = Wikilink(wraptext("foobar")) | ||||
@@ -34,6 +35,7 @@ def test_str(): | |||||
node2 = Wikilink(wraptext("foo"), wraptext("bar")) | node2 = Wikilink(wraptext("foo"), wraptext("bar")) | ||||
assert "[[foo|bar]]" == str(node2) | assert "[[foo|bar]]" == str(node2) | ||||
def test_children(): | def test_children(): | ||||
"""test Wikilink.__children__()""" | """test Wikilink.__children__()""" | ||||
node1 = Wikilink(wraptext("foobar")) | node1 = Wikilink(wraptext("foobar")) | ||||
@@ -48,6 +50,7 @@ def test_children(): | |||||
with pytest.raises(StopIteration): | with pytest.raises(StopIteration): | ||||
next(gen2) | next(gen2) | ||||
def test_strip(): | def test_strip(): | ||||
"""test Wikilink.__strip__()""" | """test Wikilink.__strip__()""" | ||||
node = Wikilink(wraptext("foobar")) | node = Wikilink(wraptext("foobar")) | ||||
@@ -55,6 +58,7 @@ def test_strip(): | |||||
assert "foobar" == node.__strip__() | assert "foobar" == node.__strip__() | ||||
assert "bar" == node2.__strip__() | assert "bar" == node2.__strip__() | ||||
def test_showtree(): | def test_showtree(): | ||||
"""test Wikilink.__showtree__()""" | """test Wikilink.__showtree__()""" | ||||
output = [] | output = [] | ||||
@@ -66,10 +70,19 @@ def test_showtree(): | |||||
node1.__showtree__(output.append, get, mark) | node1.__showtree__(output.append, get, mark) | ||||
node2.__showtree__(output.append, get, mark) | node2.__showtree__(output.append, get, mark) | ||||
valid = [ | valid = [ | ||||
"[[", (getter, node1.title), "]]", "[[", (getter, node2.title), | |||||
" | ", marker, (getter, node2.text), "]]"] | |||||
"[[", | |||||
(getter, node1.title), | |||||
"]]", | |||||
"[[", | |||||
(getter, node2.title), | |||||
" | ", | |||||
marker, | |||||
(getter, node2.text), | |||||
"]]", | |||||
] | |||||
assert valid == output | assert valid == output | ||||
def test_title(): | def test_title(): | ||||
"""test getter/setter for the title attribute""" | """test getter/setter for the title attribute""" | ||||
title = wraptext("foobar") | title = wraptext("foobar") | ||||
@@ -82,6 +95,7 @@ def test_title(): | |||||
assert_wikicode_equal(wraptext("héhehé"), node1.title) | assert_wikicode_equal(wraptext("héhehé"), node1.title) | ||||
assert_wikicode_equal(wraptext("héhehé"), node2.title) | assert_wikicode_equal(wraptext("héhehé"), node2.title) | ||||
def test_text(): | def test_text(): | ||||
"""test getter/setter for the text attribute""" | """test getter/setter for the text attribute""" | ||||
text = wraptext("baz") | text = wraptext("baz") | ||||