@@ -0,0 +1,13 @@ | |||
BasedOnStyle: LLVM | |||
AlignConsecutiveMacros: AcrossEmptyLines | |||
AllowShortFunctionsOnASingleLine: Inline | |||
AlwaysBreakAfterReturnType: TopLevelDefinitions | |||
BinPackArguments: false | |||
BinPackParameters: false | |||
BreakBeforeBraces: Linux | |||
ColumnLimit: 88 | |||
IndentPPDirectives: AfterHash | |||
IndentWidth: 4 | |||
SpaceAfterCStyleCast: true | |||
StatementMacros: | |||
- PyObject_HEAD |
@@ -23,3 +23,25 @@ jobs: | |||
with: | |||
user: __token__ | |||
password: ${{ secrets.pypi_password }} | |||
build_aarch64: | |||
runs-on: ubuntu-latest | |||
steps: | |||
- uses: actions/checkout@v2 | |||
- uses: docker/setup-qemu-action@v1 | |||
name: Set up QEMU | |||
- name: Build manylinux aarch64 Python wheels | |||
uses: RalfG/python-wheels-manylinux-build@v0.3.4-manylinux2014_aarch64 | |||
with: | |||
python-versions: 'cp36-cp36m cp37-cp37m cp38-cp38 cp39-cp39' | |||
pip-wheel-args: '-w ./wheelhouse --no-deps' | |||
- name: Move to dist/ | |||
run: | | |||
mkdir -p dist | |||
cp -v wheelhouse/*-manylinux*.whl dist/ | |||
- name: Publish package to PyPI | |||
# Only actually publish if a new tag was pushed | |||
if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') | |||
uses: pypa/gh-action-pypi-publish@37e305e7413032d8422456179fee28fac7d25187 | |||
with: | |||
user: __token__ | |||
password: ${{ secrets.pypi_password }} |
@@ -13,4 +13,6 @@ dist | |||
docs/_build | |||
scripts/*.log | |||
htmlcov/ | |||
compile_commands.json | |||
.idea/ | |||
.pytest_cache/ |
@@ -0,0 +1,9 @@ | |||
repos: | |||
- repo: https://github.com/psf/black | |||
rev: 21.8b0 | |||
hooks: | |||
- id: black | |||
- repo: https://github.com/doublify/pre-commit-clang-format | |||
rev: 62302476d0da01515660132d76902359bed0f782 | |||
hooks: | |||
- id: clang-format |
@@ -1,3 +1,9 @@ | |||
v0.6.3 (released September 2, 2021): | |||
- Added Linux AArch64 wheels. (#276) | |||
- Fixed C integer conversion, manifesting as parsing errors on big-endian | |||
platforms. (#277) | |||
v0.6.2 (released May 16, 2021): | |||
- Improved parsing of external links. (#232) | |||
@@ -1,6 +1,6 @@ | |||
# This config file is used by appveyor.com to build Windows release binaries | |||
version: 0.6.2-b{build} | |||
version: 0.6.3-b{build} | |||
branches: | |||
only: | |||
@@ -1,6 +1,18 @@ | |||
Changelog | |||
========= | |||
v0.6.3 | |||
------ | |||
`Released September 2, 2021 <https://github.com/earwig/mwparserfromhell/tree/v0.6.3>`_ | |||
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.6.2...v0.6.3>`__): | |||
- Added Linux AArch64 wheels. | |||
(`#276 <https://github.com/earwig/mwparserfromhell/issues/276>`_) | |||
- Fixed C integer conversion, manifesting as parsing errors on big-endian | |||
platforms. | |||
(`#277 <https://github.com/earwig/mwparserfromhell/issues/277>`_) | |||
v0.6.2 | |||
------ | |||
@@ -16,33 +16,33 @@ import sys, os | |||
# If extensions (or modules to document with autodoc) are in another directory, | |||
# add these directories to sys.path here. If the directory is relative to the | |||
# documentation root, use os.path.abspath to make it absolute, like shown here. | |||
sys.path.insert(0, os.path.abspath('..')) | |||
sys.path.insert(0, os.path.abspath("..")) | |||
import mwparserfromhell | |||
# -- General configuration ----------------------------------------------------- | |||
# If your documentation needs a minimal Sphinx version, state it here. | |||
#needs_sphinx = '1.0' | |||
# needs_sphinx = '1.0' | |||
# Add any Sphinx extension module names here, as strings. They can be extensions | |||
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. | |||
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx', 'sphinx.ext.viewcode'] | |||
extensions = ["sphinx.ext.autodoc", "sphinx.ext.intersphinx", "sphinx.ext.viewcode"] | |||
# Add any paths that contain templates here, relative to this directory. | |||
templates_path = ['_templates'] | |||
templates_path = ["_templates"] | |||
# The suffix of source filenames. | |||
source_suffix = '.rst' | |||
source_suffix = ".rst" | |||
# The encoding of source files. | |||
#source_encoding = 'utf-8-sig' | |||
# source_encoding = 'utf-8-sig' | |||
# The master toctree document. | |||
master_doc = 'index' | |||
master_doc = "index" | |||
# General information about the project. | |||
project = u'mwparserfromhell' | |||
copyright = u'2012–2021 Ben Kurtovic' | |||
project = "mwparserfromhell" | |||
copyright = "2012–2021 Ben Kurtovic" | |||
# The version info for the project you're documenting, acts as replacement for | |||
# |version| and |release|, also used in various other places throughout the | |||
@@ -55,158 +55,161 @@ release = mwparserfromhell.__version__ | |||
# The language for content autogenerated by Sphinx. Refer to documentation | |||
# for a list of supported languages. | |||
#language = None | |||
# language = None | |||
# There are two options for replacing |today|: either, you set today to some | |||
# non-false value, then it is used: | |||
#today = '' | |||
# today = '' | |||
# Else, today_fmt is used as the format for a strftime call. | |||
#today_fmt = '%B %d, %Y' | |||
# today_fmt = '%B %d, %Y' | |||
# List of patterns, relative to source directory, that match files and | |||
# directories to ignore when looking for source files. | |||
exclude_patterns = ['_build'] | |||
exclude_patterns = ["_build"] | |||
# The reST default role (used for this markup: `text`) to use for all documents. | |||
#default_role = None | |||
# default_role = None | |||
# If true, '()' will be appended to :func: etc. cross-reference text. | |||
#add_function_parentheses = True | |||
# add_function_parentheses = True | |||
# If true, the current module name will be prepended to all description | |||
# unit titles (such as .. function::). | |||
#add_module_names = True | |||
# add_module_names = True | |||
# If true, sectionauthor and moduleauthor directives will be shown in the | |||
# output. They are ignored by default. | |||
#show_authors = False | |||
# show_authors = False | |||
# The name of the Pygments (syntax highlighting) style to use. | |||
pygments_style = 'sphinx' | |||
pygments_style = "sphinx" | |||
# A list of ignored prefixes for module index sorting. | |||
#modindex_common_prefix = [] | |||
# modindex_common_prefix = [] | |||
# -- Options for HTML output --------------------------------------------------- | |||
# The theme to use for HTML and HTML Help pages. See the documentation for | |||
# a list of builtin themes. | |||
html_theme = 'nature' | |||
html_theme = "nature" | |||
# Theme options are theme-specific and customize the look and feel of a theme | |||
# further. For a list of options available for each theme, see the | |||
# documentation. | |||
#html_theme_options = {} | |||
# html_theme_options = {} | |||
# Add any paths that contain custom themes here, relative to this directory. | |||
#html_theme_path = [] | |||
# html_theme_path = [] | |||
# The name for this set of Sphinx documents. If None, it defaults to | |||
# "<project> v<release> documentation". | |||
#html_title = None | |||
# html_title = None | |||
# A shorter title for the navigation bar. Default is the same as html_title. | |||
#html_short_title = None | |||
# html_short_title = None | |||
# The name of an image file (relative to this directory) to place at the top | |||
# of the sidebar. | |||
#html_logo = None | |||
# html_logo = None | |||
# The name of an image file (within the static path) to use as favicon of the | |||
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 | |||
# pixels large. | |||
#html_favicon = None | |||
# html_favicon = None | |||
# Add any paths that contain custom static files (such as style sheets) here, | |||
# relative to this directory. They are copied after the builtin static files, | |||
# so a file named "default.css" will overwrite the builtin "default.css". | |||
html_static_path = ['_static'] | |||
html_static_path = ["_static"] | |||
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, | |||
# using the given strftime format. | |||
#html_last_updated_fmt = '%b %d, %Y' | |||
# html_last_updated_fmt = '%b %d, %Y' | |||
# If true, SmartyPants will be used to convert quotes and dashes to | |||
# typographically correct entities. | |||
#html_use_smartypants = True | |||
# html_use_smartypants = True | |||
# Custom sidebar templates, maps document names to template names. | |||
#html_sidebars = {} | |||
# html_sidebars = {} | |||
# Additional templates that should be rendered to pages, maps page names to | |||
# template names. | |||
#html_additional_pages = {} | |||
# html_additional_pages = {} | |||
# If false, no module index is generated. | |||
#html_domain_indices = True | |||
# html_domain_indices = True | |||
# If false, no index is generated. | |||
#html_use_index = True | |||
# html_use_index = True | |||
# If true, the index is split into individual pages for each letter. | |||
#html_split_index = False | |||
# html_split_index = False | |||
# If true, links to the reST sources are added to the pages. | |||
#html_show_sourcelink = True | |||
# html_show_sourcelink = True | |||
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. | |||
#html_show_sphinx = True | |||
# html_show_sphinx = True | |||
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. | |||
#html_show_copyright = True | |||
# html_show_copyright = True | |||
# If true, an OpenSearch description file will be output, and all pages will | |||
# contain a <link> tag referring to it. The value of this option must be the | |||
# base URL from which the finished HTML is served. | |||
#html_use_opensearch = '' | |||
# html_use_opensearch = '' | |||
# This is the file name suffix for HTML files (e.g. ".xhtml"). | |||
#html_file_suffix = None | |||
# html_file_suffix = None | |||
# Output file base name for HTML help builder. | |||
htmlhelp_basename = 'mwparserfromhelldoc' | |||
htmlhelp_basename = "mwparserfromhelldoc" | |||
# -- Options for LaTeX output -------------------------------------------------- | |||
latex_elements = { | |||
# The paper size ('letterpaper' or 'a4paper'). | |||
#'papersize': 'letterpaper', | |||
# The font size ('10pt', '11pt' or '12pt'). | |||
#'pointsize': '10pt', | |||
# Additional stuff for the LaTeX preamble. | |||
#'preamble': '', | |||
# The paper size ('letterpaper' or 'a4paper'). | |||
#'papersize': 'letterpaper', | |||
# The font size ('10pt', '11pt' or '12pt'). | |||
#'pointsize': '10pt', | |||
# Additional stuff for the LaTeX preamble. | |||
#'preamble': '', | |||
} | |||
# Grouping the document tree into LaTeX files. List of tuples | |||
# (source start file, target name, title, author, documentclass [howto/manual]). | |||
latex_documents = [ | |||
('index', 'mwparserfromhell.tex', u'mwparserfromhell Documentation', | |||
u'Ben Kurtovic', 'manual'), | |||
( | |||
"index", | |||
"mwparserfromhell.tex", | |||
"mwparserfromhell Documentation", | |||
"Ben Kurtovic", | |||
"manual", | |||
) | |||
] | |||
# The name of an image file (relative to this directory) to place at the top of | |||
# the title page. | |||
#latex_logo = None | |||
# latex_logo = None | |||
# For "manual" documents, if this is true, then toplevel headings are parts, | |||
# not chapters. | |||
#latex_use_parts = False | |||
# latex_use_parts = False | |||
# If true, show page references after internal links. | |||
#latex_show_pagerefs = False | |||
# latex_show_pagerefs = False | |||
# If true, show URL addresses after external links. | |||
#latex_show_urls = False | |||
# latex_show_urls = False | |||
# Documents to append as an appendix to all manuals. | |||
#latex_appendices = [] | |||
# latex_appendices = [] | |||
# If false, no module index is generated. | |||
#latex_domain_indices = True | |||
# latex_domain_indices = True | |||
# -- Options for manual page output -------------------------------------------- | |||
@@ -214,12 +217,17 @@ latex_documents = [ | |||
# One entry per manual page. List of tuples | |||
# (source start file, name, description, authors, manual section). | |||
man_pages = [ | |||
('index', 'mwparserfromhell', u'mwparserfromhell Documentation', | |||
[u'Ben Kurtovic'], 1) | |||
( | |||
"index", | |||
"mwparserfromhell", | |||
"mwparserfromhell Documentation", | |||
["Ben Kurtovic"], | |||
1, | |||
) | |||
] | |||
# If true, show URL addresses after external links. | |||
#man_show_urls = False | |||
# man_show_urls = False | |||
# -- Options for Texinfo output ------------------------------------------------ | |||
@@ -228,20 +236,26 @@ man_pages = [ | |||
# (source start file, target name, title, author, | |||
# dir menu entry, description, category) | |||
texinfo_documents = [ | |||
('index', 'mwparserfromhell', u'mwparserfromhell Documentation', | |||
u'Ben Kurtovic', 'mwparserfromhell', 'One line description of project.', | |||
'Miscellaneous'), | |||
( | |||
"index", | |||
"mwparserfromhell", | |||
"mwparserfromhell Documentation", | |||
"Ben Kurtovic", | |||
"mwparserfromhell", | |||
"One line description of project.", | |||
"Miscellaneous", | |||
) | |||
] | |||
# Documents to append as an appendix to all manuals. | |||
#texinfo_appendices = [] | |||
# texinfo_appendices = [] | |||
# If false, no module index is generated. | |||
#texinfo_domain_indices = True | |||
# texinfo_domain_indices = True | |||
# How to display URL addresses: 'footnote', 'no', or 'inline'. | |||
#texinfo_show_urls = 'footnote' | |||
# texinfo_show_urls = 'footnote' | |||
# Example configuration for intersphinx: refer to the Python standard library. | |||
intersphinx_mapping = {'http://docs.python.org/': None} | |||
intersphinx_mapping = {"http://docs.python.org/": None} |
@@ -41,6 +41,7 @@ from mwparserfromhell.parser._tokenizer import CTokenizer | |||
LOOPS = 10000 | |||
class Color: | |||
GRAY = "\x1b[30;1m" | |||
GREEN = "\x1b[92m" | |||
@@ -63,11 +64,11 @@ class MemoryTest: | |||
data = {"name": None, "label": None, "input": None, "output": None} | |||
for line in test.strip().splitlines(): | |||
if line.startswith("name:"): | |||
data["name"] = line[len("name:"):].strip() | |||
data["name"] = line[len("name:") :].strip() | |||
elif line.startswith("label:"): | |||
data["label"] = line[len("label:"):].strip() | |||
data["label"] = line[len("label:") :].strip() | |||
elif line.startswith("input:"): | |||
raw = line[len("input:"):].strip() | |||
raw = line[len("input:") :].strip() | |||
if raw[0] == '"' and raw[-1] == '"': | |||
raw = raw[1:-1] | |||
raw = raw.encode("raw_unicode_escape") | |||
@@ -81,7 +82,7 @@ class MemoryTest: | |||
def load_file(filename): | |||
with open(filename, "rU") as fp: | |||
text = fp.read() | |||
name = path.split(filename)[1][:0-len(extension)] | |||
name = path.split(filename)[1][: 0 - len(extension)] | |||
self._parse_file(name, text) | |||
root = path.split(path.dirname(path.abspath(__file__)))[0] | |||
@@ -119,8 +120,11 @@ class MemoryTest: | |||
tmpl = "{0}[{1:03}/{2}]{3} {4}: " | |||
for i, (name, text) in enumerate(self._tests, 1): | |||
sys.stdout.write(tmpl.format(Color.GRAY, i, len(self._tests), | |||
Color.RESET, name.ljust(width))) | |||
sys.stdout.write( | |||
tmpl.format( | |||
Color.GRAY, i, len(self._tests), Color.RESET, name.ljust(width) | |||
) | |||
) | |||
sys.stdout.flush() | |||
parent, child = Pipe() | |||
p = Process(target=_runner, args=(text, child)) | |||
@@ -156,6 +160,7 @@ def _runner(text, child): | |||
child.send("OK") | |||
child.recv() | |||
if __name__ == "__main__": | |||
setlocale(LC_ALL, "") | |||
MemoryTest().run() |
@@ -52,8 +52,10 @@ elif env_var is not None: | |||
# Remove the command line argument as it isn't understood by setuptools: | |||
sys.argv = [arg for arg in sys.argv | |||
if arg not in ("--without-extension", "--with-extension")] | |||
sys.argv = [ | |||
arg for arg in sys.argv if arg not in ("--without-extension", "--with-extension") | |||
] | |||
def build_ext_patched(self): | |||
try: | |||
@@ -63,33 +65,40 @@ def build_ext_patched(self): | |||
print("Falling back to pure Python mode.") | |||
del self.extensions[:] | |||
if fallback: | |||
build_ext.run, build_ext_original = build_ext_patched, build_ext.run | |||
# Project-specific part begins here: | |||
tokenizer = Extension("mwparserfromhell.parser._tokenizer", | |||
sources=sorted(glob("src/mwparserfromhell/parser/ctokenizer/*.c")), | |||
depends=sorted(glob("src/mwparserfromhell/parser/ctokenizer/*.h"))) | |||
tokenizer = Extension( | |||
"mwparserfromhell.parser._tokenizer", | |||
sources=sorted(glob("src/mwparserfromhell/parser/ctokenizer/*.c")), | |||
depends=sorted(glob("src/mwparserfromhell/parser/ctokenizer/*.h")), | |||
) | |||
setup( | |||
name = "mwparserfromhell", | |||
packages = find_packages("src"), | |||
package_dir = {"": "src"}, | |||
ext_modules = [tokenizer] if use_extension else [], | |||
setup_requires = ["pytest-runner"] if "test" in sys.argv or "pytest" in sys.argv else [], | |||
tests_require = ["pytest"], | |||
version = __version__, | |||
python_requires = ">= 3.5", | |||
author = "Ben Kurtovic", | |||
author_email = "ben.kurtovic@gmail.com", | |||
url = "https://github.com/earwig/mwparserfromhell", | |||
description = "MWParserFromHell is a parser for MediaWiki wikicode.", | |||
long_description = long_docs, | |||
download_url = "https://github.com/earwig/mwparserfromhell/tarball/v{}".format(__version__), | |||
keywords = "earwig mwparserfromhell wikipedia wiki mediawiki wikicode template parsing", | |||
license = "MIT License", | |||
classifiers = [ | |||
name="mwparserfromhell", | |||
packages=find_packages("src"), | |||
package_dir={"": "src"}, | |||
ext_modules=[tokenizer] if use_extension else [], | |||
setup_requires=["pytest-runner"] | |||
if "test" in sys.argv or "pytest" in sys.argv | |||
else [], | |||
tests_require=["pytest"], | |||
version=__version__, | |||
python_requires=">= 3.5", | |||
author="Ben Kurtovic", | |||
author_email="ben.kurtovic@gmail.com", | |||
url="https://github.com/earwig/mwparserfromhell", | |||
description="MWParserFromHell is a parser for MediaWiki wikicode.", | |||
long_description=long_docs, | |||
download_url="https://github.com/earwig/mwparserfromhell/tarball/v{}".format( | |||
__version__ | |||
), | |||
keywords="earwig mwparserfromhell wikipedia wiki mediawiki wikicode template parsing", | |||
license="MIT License", | |||
classifiers=[ | |||
"Development Status :: 4 - Beta", | |||
"Environment :: Console", | |||
"Intended Audience :: Developers", | |||
@@ -101,6 +110,6 @@ setup( | |||
"Programming Language :: Python :: 3.7", | |||
"Programming Language :: Python :: 3.8", | |||
"Programming Language :: Python :: 3.9", | |||
"Topic :: Text Processing :: Markup" | |||
"Topic :: Text Processing :: Markup", | |||
], | |||
) |
@@ -27,10 +27,9 @@ outrageously powerful parser for `MediaWiki <https://www.mediawiki.org>`_ wikico | |||
__author__ = "Ben Kurtovic" | |||
__copyright__ = "Copyright (C) 2012-2021 Ben Kurtovic" | |||
__license__ = "MIT License" | |||
__version__ = "0.6.2" | |||
__version__ = "0.6.3" | |||
__email__ = "ben.kurtovic@gmail.com" | |||
from . import (definitions, nodes, parser, smart_list, string_mixin, | |||
utils, wikicode) | |||
from . import definitions, nodes, parser, smart_list, string_mixin, utils, wikicode | |||
parse = utils.parse_anything |
@@ -26,8 +26,14 @@ When updating this file, please also update the the C tokenizer version: | |||
- mwparserfromhell/parser/ctokenizer/definitions.h | |||
""" | |||
__all__ = ["get_html_tag", "is_parsable", "is_visible", "is_single", | |||
"is_single_only", "is_scheme"] | |||
__all__ = [ | |||
"get_html_tag", | |||
"is_parsable", | |||
"is_visible", | |||
"is_single", | |||
"is_single_only", | |||
"is_scheme", | |||
] | |||
URI_SCHEMES = { | |||
# [wikimedia/mediawiki.git]/includes/DefaultSettings.php @ 5c660de5d0 | |||
@@ -92,7 +98,7 @@ INVISIBLE_TAGS = [ | |||
"score", | |||
"section", | |||
"templatedata", | |||
"timeline" | |||
"timeline", | |||
] | |||
# [wikimedia/mediawiki.git]/includes/parser/Sanitizer.php @ 95e17ee645 | |||
@@ -103,29 +109,35 @@ MARKUP_TO_HTML = { | |||
"#": "li", | |||
"*": "li", | |||
";": "dt", | |||
":": "dd" | |||
":": "dd", | |||
} | |||
def get_html_tag(markup): | |||
"""Return the HTML tag associated with the given wiki-markup.""" | |||
return MARKUP_TO_HTML[markup] | |||
def is_parsable(tag): | |||
"""Return if the given *tag*'s contents should be passed to the parser.""" | |||
return tag.lower() not in PARSER_BLACKLIST | |||
def is_visible(tag): | |||
"""Return whether or not the given *tag* contains visible text.""" | |||
return tag.lower() not in INVISIBLE_TAGS | |||
def is_single(tag): | |||
"""Return whether or not the given *tag* can exist without a close tag.""" | |||
return tag.lower() in SINGLE | |||
def is_single_only(tag): | |||
"""Return whether or not the given *tag* must exist without a close tag.""" | |||
return tag.lower() in SINGLE_ONLY | |||
def is_scheme(scheme, slashes=True): | |||
"""Return whether *scheme* is valid for external links.""" | |||
scheme = scheme.lower() | |||
@@ -39,5 +39,15 @@ from .tag import Tag | |||
from .template import Template | |||
from .wikilink import Wikilink | |||
__all__ = ["Argument", "Comment", "ExternalLink", "HTMLEntity", "Heading", | |||
"Node", "Tag", "Template", "Text", "Wikilink"] | |||
__all__ = [ | |||
"Argument", | |||
"Comment", | |||
"ExternalLink", | |||
"HTMLEntity", | |||
"Heading", | |||
"Node", | |||
"Tag", | |||
"Template", | |||
"Text", | |||
"Wikilink", | |||
] |
@@ -22,6 +22,7 @@ from ..string_mixin import StringMixIn | |||
__all__ = ["Node"] | |||
class Node(StringMixIn): | |||
"""Represents the base Node type, demonstrating the methods to override. | |||
@@ -35,6 +36,7 @@ class Node(StringMixIn): | |||
:meth:`__showtree__` can be overridden to build a nice tree representation | |||
of the node, if desired, for :meth:`~.Wikicode.get_tree`. | |||
""" | |||
def __str__(self): | |||
raise NotImplementedError() | |||
@@ -24,6 +24,7 @@ from ..utils import parse_anything | |||
__all__ = ["Argument"] | |||
class Argument(Node): | |||
"""Represents a template argument substitution, like ``{{{foo}}}``.""" | |||
@@ -23,6 +23,7 @@ from ._base import Node | |||
__all__ = ["Comment"] | |||
class Comment(Node): | |||
"""Represents a hidden HTML comment, like ``<!-- foobar -->``.""" | |||
@@ -24,6 +24,7 @@ from ..utils import parse_anything | |||
__all__ = ["ExternalLink"] | |||
class ExternalLink(Node): | |||
"""Represents an external link, like ``[http://example.com/ Example]``.""" | |||
@@ -83,6 +84,7 @@ class ExternalLink(Node): | |||
def url(self, value): | |||
# pylint: disable=import-outside-toplevel | |||
from ..parser import contexts | |||
self._url = parse_anything(value, contexts.EXT_LINK_URI) | |||
@title.setter | |||
@@ -24,6 +24,7 @@ from ...utils import parse_anything | |||
__all__ = ["Attribute"] | |||
class Attribute(StringMixIn): | |||
"""Represents an attribute of an HTML tag. | |||
@@ -32,8 +33,15 @@ class Attribute(StringMixIn): | |||
whose value is ``"foo"``. | |||
""" | |||
def __init__(self, name, value=None, quotes='"', pad_first=" ", | |||
pad_before_eq="", pad_after_eq=""): | |||
def __init__( | |||
self, | |||
name, | |||
value=None, | |||
quotes='"', | |||
pad_first=" ", | |||
pad_before_eq="", | |||
pad_after_eq="", | |||
): | |||
super().__init__() | |||
self.name = name | |||
self._quotes = None | |||
@@ -25,6 +25,7 @@ from ...utils import parse_anything | |||
__all__ = ["Parameter"] | |||
class Parameter(StringMixIn): | |||
"""Represents a paramater of a template. | |||
@@ -77,6 +78,5 @@ class Parameter(StringMixIn): | |||
def showkey(self, newval): | |||
newval = bool(newval) | |||
if not newval and not self.can_hide_key(self.name): | |||
raise ValueError("parameter key {!r} cannot be hidden".format( | |||
self.name)) | |||
raise ValueError("parameter key {!r} cannot be hidden".format(self.name)) | |||
self._showkey = newval |
@@ -24,6 +24,7 @@ from ..utils import parse_anything | |||
__all__ = ["Heading"] | |||
class Heading(Node): | |||
"""Represents a section heading in wikicode, like ``== Foo ==``.""" | |||
@@ -24,6 +24,7 @@ from ._base import Node | |||
__all__ = ["HTMLEntity"] | |||
class HTMLEntity(Node): | |||
"""Represents an HTML entity, like `` ``, either named or unnamed.""" | |||
@@ -101,19 +102,23 @@ class HTMLEntity(Node): | |||
except ValueError: | |||
if newval not in htmlentities.entitydefs: | |||
raise ValueError( | |||
"entity value {!r} is not a valid name".format(newval)) from None | |||
"entity value {!r} is not a valid name".format(newval) | |||
) from None | |||
self._named = True | |||
self._hexadecimal = False | |||
else: | |||
if intval < 0 or intval > 0x10FFFF: | |||
raise ValueError( | |||
"entity value 0x{:x} is not in range(0x110000)".format(intval)) from None | |||
"entity value 0x{:x} is not in range(0x110000)".format(intval) | |||
) from None | |||
self._named = False | |||
self._hexadecimal = True | |||
else: | |||
test = int(newval, 16 if self.hexadecimal else 10) | |||
if test < 0 or test > 0x10FFFF: | |||
raise ValueError("entity value {} is not in range(0x110000)".format(test)) | |||
raise ValueError( | |||
"entity value {} is not in range(0x110000)".format(test) | |||
) | |||
self._named = False | |||
self._value = newval | |||
@@ -126,8 +131,10 @@ class HTMLEntity(Node): | |||
try: | |||
int(self.value, 16) | |||
except ValueError as exc: | |||
raise ValueError("current entity value {!r} is not a valid " | |||
"Unicode codepoint".format(self.value)) from exc | |||
raise ValueError( | |||
"current entity value {!r} is not a valid " | |||
"Unicode codepoint".format(self.value) | |||
) from exc | |||
self._named = newval | |||
@hexadecimal.setter | |||
@@ -26,13 +26,24 @@ from ..utils import parse_anything | |||
__all__ = ["Tag"] | |||
class Tag(Node): | |||
"""Represents an HTML-style tag in wikicode, like ``<ref>``.""" | |||
def __init__(self, tag, contents=None, attrs=None, wiki_markup=None, | |||
self_closing=False, invalid=False, implicit=False, padding="", | |||
closing_tag=None, wiki_style_separator=None, | |||
closing_wiki_markup=None): | |||
def __init__( | |||
self, | |||
tag, | |||
contents=None, | |||
attrs=None, | |||
wiki_markup=None, | |||
self_closing=False, | |||
invalid=False, | |||
implicit=False, | |||
padding="", | |||
closing_tag=None, | |||
wiki_style_separator=None, | |||
closing_wiki_markup=None, | |||
): | |||
super().__init__() | |||
self.tag = tag | |||
self.contents = contents | |||
@@ -60,8 +71,14 @@ class Tag(Node): | |||
if self.self_closing: | |||
return self.wiki_markup + attrs + padding + separator | |||
close = self.closing_wiki_markup or "" | |||
return self.wiki_markup + attrs + padding + separator + \ | |||
str(self.contents) + close | |||
return ( | |||
self.wiki_markup | |||
+ attrs | |||
+ padding | |||
+ separator | |||
+ str(self.contents) | |||
+ close | |||
) | |||
result = ("</" if self.invalid else "<") + str(self.tag) | |||
if self.attributes: | |||
@@ -270,8 +287,15 @@ class Tag(Node): | |||
return attr | |||
raise ValueError(name) | |||
def add(self, name, value=None, quotes='"', pad_first=" ", | |||
pad_before_eq="", pad_after_eq=""): | |||
def add( | |||
self, | |||
name, | |||
value=None, | |||
quotes='"', | |||
pad_first=" ", | |||
pad_before_eq="", | |||
pad_after_eq="", | |||
): | |||
"""Add an attribute with the given *name* and *value*. | |||
*name* and *value* can be anything parsable by | |||
@@ -33,6 +33,7 @@ FLAGS = re.DOTALL | re.UNICODE | |||
# Used to allow None as a valid fallback value | |||
_UNSET = object() | |||
class Template(Node): | |||
"""Represents a template in wikicode, like ``{{foo}}``.""" | |||
@@ -153,7 +154,7 @@ class Template(Node): | |||
def _fix_dependendent_params(self, i): | |||
"""Unhide keys if necessary after removing the param at index *i*.""" | |||
if not self.params[i].showkey: | |||
for param in self.params[i + 1:]: | |||
for param in self.params[i + 1 :]: | |||
if not param.showkey: | |||
param.showkey = True | |||
@@ -175,9 +176,10 @@ class Template(Node): | |||
If one exists, we should remove the given one rather than blanking it. | |||
""" | |||
if self.params[i].showkey: | |||
following = self.params[i + 1:] | |||
better_matches = [after.name.strip() == name and not after.showkey | |||
for after in following] | |||
following = self.params[i + 1 :] | |||
better_matches = [ | |||
after.name.strip() == name and not after.showkey for after in following | |||
] | |||
return any(better_matches) | |||
return False | |||
@@ -235,8 +237,7 @@ class Template(Node): | |||
def __getitem__(self, name): | |||
return self.get(name) | |||
def add(self, name, value, showkey=None, before=None, | |||
preserve_spacing=True): | |||
def add(self, name, value, showkey=None, before=None, preserve_spacing=True): | |||
"""Add a parameter to the template with a given *name* and *value*. | |||
*name* and *value* can be anything parsable by | |||
@@ -23,6 +23,7 @@ from ._base import Node | |||
__all__ = ["Text"] | |||
class Text(Node): | |||
"""Represents ordinary, unformatted text with no special properties.""" | |||
@@ -24,6 +24,7 @@ from ..utils import parse_anything | |||
__all__ = ["Wikilink"] | |||
class Wikilink(Node): | |||
"""Represents an internal wikilink, like ``[[Foo|Bar]]``.""" | |||
@@ -26,16 +26,20 @@ together into one interface. | |||
from .builder import Builder | |||
from .errors import ParserError | |||
try: | |||
from ._tokenizer import CTokenizer | |||
use_c = True | |||
except ImportError: | |||
from .tokenizer import Tokenizer | |||
CTokenizer = None | |||
use_c = False | |||
__all__ = ["use_c", "Parser", "ParserError"] | |||
class Parser: | |||
"""Represents a parser for wikicode. | |||
@@ -57,6 +61,7 @@ class Parser: | |||
self._tokenizer = CTokenizer() | |||
else: | |||
from .tokenizer import Tokenizer | |||
self._tokenizer = Tokenizer() | |||
self._builder = Builder() | |||
@@ -21,24 +21,34 @@ | |||
from . import tokens | |||
from .errors import ParserError | |||
from ..nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, Tag, | |||
Template, Text, Wikilink) | |||
from ..nodes import ( | |||
Argument, | |||
Comment, | |||
ExternalLink, | |||
Heading, | |||
HTMLEntity, | |||
Tag, | |||
Template, | |||
Text, | |||
Wikilink, | |||
) | |||
from ..nodes.extras import Attribute, Parameter | |||
from ..smart_list import SmartList | |||
from ..wikicode import Wikicode | |||
__all__ = ["Builder"] | |||
_HANDLERS = { | |||
tokens.Text: lambda self, token: Text(token.text) | |||
} | |||
_HANDLERS = {tokens.Text: lambda self, token: Text(token.text)} | |||
def _add_handler(token_type): | |||
"""Create a decorator that adds a handler function to the lookup table.""" | |||
def decorator(func): | |||
"""Add a handler function to the lookup table.""" | |||
_HANDLERS[token_type] = func | |||
return func | |||
return decorator | |||
@@ -84,8 +94,9 @@ class Builder: | |||
key = self._pop() | |||
showkey = True | |||
self._push() | |||
elif isinstance(token, (tokens.TemplateParamSeparator, | |||
tokens.TemplateClose)): | |||
elif isinstance( | |||
token, (tokens.TemplateParamSeparator, tokens.TemplateClose) | |||
): | |||
self._tokens.append(token) | |||
value = self._pop() | |||
if key is None: | |||
@@ -167,10 +178,17 @@ class Builder: | |||
self._push() | |||
elif isinstance(token, tokens.ExternalLinkClose): | |||
if url is not None: | |||
return ExternalLink(url, self._pop(), brackets=brackets, | |||
suppress_space=suppress_space is True) | |||
return ExternalLink(self._pop(), brackets=brackets, | |||
suppress_space=suppress_space is True) | |||
return ExternalLink( | |||
url, | |||
self._pop(), | |||
brackets=brackets, | |||
suppress_space=suppress_space is True, | |||
) | |||
return ExternalLink( | |||
self._pop(), | |||
brackets=brackets, | |||
suppress_space=suppress_space is True, | |||
) | |||
else: | |||
self._write(self._handle_token(token)) | |||
raise ParserError("_handle_external_link() missed a close token") | |||
@@ -184,8 +202,9 @@ class Builder: | |||
if isinstance(token, tokens.HTMLEntityHex): | |||
text = self._tokens.pop() | |||
self._tokens.pop() # Remove HTMLEntityEnd | |||
return HTMLEntity(text.text, named=False, hexadecimal=True, | |||
hex_char=token.char) | |||
return HTMLEntity( | |||
text.text, named=False, hexadecimal=True, hex_char=token.char | |||
) | |||
self._tokens.pop() # Remove HTMLEntityEnd | |||
return HTMLEntity(token.text, named=False, hexadecimal=False) | |||
self._tokens.pop() # Remove HTMLEntityEnd | |||
@@ -227,15 +246,23 @@ class Builder: | |||
self._push() | |||
elif isinstance(token, tokens.TagAttrQuote): | |||
quotes = token.char | |||
elif isinstance(token, (tokens.TagAttrStart, tokens.TagCloseOpen, | |||
tokens.TagCloseSelfclose)): | |||
elif isinstance( | |||
token, | |||
(tokens.TagAttrStart, tokens.TagCloseOpen, tokens.TagCloseSelfclose), | |||
): | |||
self._tokens.append(token) | |||
if name: | |||
value = self._pop() | |||
else: | |||
name, value = self._pop(), None | |||
return Attribute(name, value, quotes, start.pad_first, | |||
start.pad_before_eq, start.pad_after_eq) | |||
return Attribute( | |||
name, | |||
value, | |||
quotes, | |||
start.pad_first, | |||
start.pad_before_eq, | |||
start.pad_after_eq, | |||
) | |||
else: | |||
self._write(self._handle_token(token)) | |||
raise ParserError("_handle_attribute() missed a close token") | |||
@@ -271,9 +298,19 @@ class Builder: | |||
else: | |||
self_closing = False | |||
closing_tag = self._pop() | |||
return Tag(tag, contents, attrs, wiki_markup, self_closing, | |||
invalid, implicit, padding, closing_tag, | |||
wiki_style_separator, closing_wiki_markup) | |||
return Tag( | |||
tag, | |||
contents, | |||
attrs, | |||
wiki_markup, | |||
self_closing, | |||
invalid, | |||
implicit, | |||
padding, | |||
closing_tag, | |||
wiki_style_separator, | |||
closing_wiki_markup, | |||
) | |||
else: | |||
self._write(self._handle_token(token)) | |||
raise ParserError("_handle_tag() missed a close token") | |||
@@ -116,21 +116,21 @@ Aggregate contexts: | |||
# Local contexts: | |||
TEMPLATE_NAME = 1 << 0 | |||
TEMPLATE_PARAM_KEY = 1 << 1 | |||
TEMPLATE_NAME = 1 << 0 | |||
TEMPLATE_PARAM_KEY = 1 << 1 | |||
TEMPLATE_PARAM_VALUE = 1 << 2 | |||
TEMPLATE = TEMPLATE_NAME + TEMPLATE_PARAM_KEY + TEMPLATE_PARAM_VALUE | |||
ARGUMENT_NAME = 1 << 3 | |||
ARGUMENT_NAME = 1 << 3 | |||
ARGUMENT_DEFAULT = 1 << 4 | |||
ARGUMENT = ARGUMENT_NAME + ARGUMENT_DEFAULT | |||
WIKILINK_TITLE = 1 << 5 | |||
WIKILINK_TEXT = 1 << 6 | |||
WIKILINK_TEXT = 1 << 6 | |||
WIKILINK = WIKILINK_TITLE + WIKILINK_TEXT | |||
EXT_LINK_URI = 1 << 7 | |||
EXT_LINK_TITLE = 1 << 8 | |||
EXT_LINK_URI = 1 << 7 | |||
EXT_LINK_TITLE = 1 << 8 | |||
EXT_LINK = EXT_LINK_URI + EXT_LINK_TITLE | |||
HEADING_LEVEL_1 = 1 << 9 | |||
@@ -139,42 +139,61 @@ HEADING_LEVEL_3 = 1 << 11 | |||
HEADING_LEVEL_4 = 1 << 12 | |||
HEADING_LEVEL_5 = 1 << 13 | |||
HEADING_LEVEL_6 = 1 << 14 | |||
HEADING = (HEADING_LEVEL_1 + HEADING_LEVEL_2 + HEADING_LEVEL_3 + | |||
HEADING_LEVEL_4 + HEADING_LEVEL_5 + HEADING_LEVEL_6) | |||
TAG_OPEN = 1 << 15 | |||
TAG_ATTR = 1 << 16 | |||
TAG_BODY = 1 << 17 | |||
HEADING = ( | |||
HEADING_LEVEL_1 | |||
+ HEADING_LEVEL_2 | |||
+ HEADING_LEVEL_3 | |||
+ HEADING_LEVEL_4 | |||
+ HEADING_LEVEL_5 | |||
+ HEADING_LEVEL_6 | |||
) | |||
TAG_OPEN = 1 << 15 | |||
TAG_ATTR = 1 << 16 | |||
TAG_BODY = 1 << 17 | |||
TAG_CLOSE = 1 << 18 | |||
TAG = TAG_OPEN + TAG_ATTR + TAG_BODY + TAG_CLOSE | |||
STYLE_ITALICS = 1 << 19 | |||
STYLE_BOLD = 1 << 20 | |||
STYLE_PASS_AGAIN = 1 << 21 | |||
STYLE_SECOND_PASS = 1 << 22 | |||
STYLE_ITALICS = 1 << 19 | |||
STYLE_BOLD = 1 << 20 | |||
STYLE_PASS_AGAIN = 1 << 21 | |||
STYLE_SECOND_PASS = 1 << 22 | |||
STYLE = STYLE_ITALICS + STYLE_BOLD + STYLE_PASS_AGAIN + STYLE_SECOND_PASS | |||
DL_TERM = 1 << 23 | |||
HAS_TEXT = 1 << 24 | |||
FAIL_ON_TEXT = 1 << 25 | |||
FAIL_NEXT = 1 << 26 | |||
HAS_TEXT = 1 << 24 | |||
FAIL_ON_TEXT = 1 << 25 | |||
FAIL_NEXT = 1 << 26 | |||
FAIL_ON_LBRACE = 1 << 27 | |||
FAIL_ON_RBRACE = 1 << 28 | |||
FAIL_ON_EQUALS = 1 << 29 | |||
HAS_TEMPLATE = 1 << 30 | |||
SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE + | |||
FAIL_ON_RBRACE + FAIL_ON_EQUALS + HAS_TEMPLATE) | |||
TABLE_OPEN = 1 << 31 | |||
TABLE_CELL_OPEN = 1 << 32 | |||
HAS_TEMPLATE = 1 << 30 | |||
SAFETY_CHECK = ( | |||
HAS_TEXT | |||
+ FAIL_ON_TEXT | |||
+ FAIL_NEXT | |||
+ FAIL_ON_LBRACE | |||
+ FAIL_ON_RBRACE | |||
+ FAIL_ON_EQUALS | |||
+ HAS_TEMPLATE | |||
) | |||
TABLE_OPEN = 1 << 31 | |||
TABLE_CELL_OPEN = 1 << 32 | |||
TABLE_CELL_STYLE = 1 << 33 | |||
TABLE_ROW_OPEN = 1 << 34 | |||
TABLE_TD_LINE = 1 << 35 | |||
TABLE_TH_LINE = 1 << 36 | |||
TABLE_ROW_OPEN = 1 << 34 | |||
TABLE_TD_LINE = 1 << 35 | |||
TABLE_TH_LINE = 1 << 36 | |||
TABLE_CELL_LINE_CONTEXTS = TABLE_TD_LINE + TABLE_TH_LINE + TABLE_CELL_STYLE | |||
TABLE = (TABLE_OPEN + TABLE_CELL_OPEN + TABLE_CELL_STYLE + TABLE_ROW_OPEN + | |||
TABLE_TD_LINE + TABLE_TH_LINE) | |||
TABLE = ( | |||
TABLE_OPEN | |||
+ TABLE_CELL_OPEN | |||
+ TABLE_CELL_STYLE | |||
+ TABLE_ROW_OPEN | |||
+ TABLE_TD_LINE | |||
+ TABLE_TH_LINE | |||
) | |||
HTML_ENTITY = 1 << 37 | |||
@@ -184,14 +203,20 @@ GL_HEADING = 1 << 0 | |||
# Aggregate contexts: | |||
FAIL = (TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK_TITLE + HEADING + TAG + | |||
STYLE + TABLE) | |||
UNSAFE = (TEMPLATE_NAME + WIKILINK_TITLE + EXT_LINK_TITLE + | |||
TEMPLATE_PARAM_KEY + ARGUMENT_NAME + TAG_CLOSE) | |||
FAIL = TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK_TITLE + HEADING + TAG + STYLE + TABLE | |||
UNSAFE = ( | |||
TEMPLATE_NAME | |||
+ WIKILINK_TITLE | |||
+ EXT_LINK_TITLE | |||
+ TEMPLATE_PARAM_KEY | |||
+ ARGUMENT_NAME | |||
+ TAG_CLOSE | |||
) | |||
DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE + TABLE_ROW_OPEN | |||
NO_WIKILINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK_URI | |||
NO_EXT_LINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK | |||
def describe(context): | |||
"""Return a string describing the given context value, for debugging.""" | |||
flags = [] | |||
@@ -1,6 +1,6 @@ | |||
/* | |||
* avl_tree.h - intrusive, nonrecursive AVL tree data structure (self-balancing | |||
* binary search tree), header file | |||
* binary search tree), header file | |||
* | |||
* Written in 2014-2016 by Eric Biggers <ebiggers3@gmail.com> | |||
* Slight changes for compatibility by Ben Kurtovic <ben.kurtovic@gmail.com> | |||
@@ -24,60 +24,60 @@ | |||
#include <stddef.h> | |||
#if !defined(_MSC_VER) || (_MSC_VER >= 1600) | |||
#include <stdint.h> | |||
# include <stdint.h> | |||
#endif | |||
#ifdef __GNUC__ | |||
# define AVL_INLINE inline __attribute__((always_inline)) | |||
# define AVL_INLINE inline __attribute__((always_inline)) | |||
#elif defined(_MSC_VER) && (_MSC_VER < 1900) | |||
# define AVL_INLINE __inline | |||
# define AVL_INLINE __inline | |||
#else | |||
# define AVL_INLINE inline | |||
# define AVL_INLINE inline | |||
#endif | |||
/* Node in an AVL tree. Embed this in some other data structure. */ | |||
struct avl_tree_node { | |||
/* Pointer to left child or NULL */ | |||
struct avl_tree_node *left; | |||
/* Pointer to left child or NULL */ | |||
struct avl_tree_node *left; | |||
/* Pointer to right child or NULL */ | |||
struct avl_tree_node *right; | |||
/* Pointer to right child or NULL */ | |||
struct avl_tree_node *right; | |||
/* Pointer to parent combined with the balance factor. This saves 4 or | |||
* 8 bytes of memory depending on the CPU architecture. | |||
* | |||
* Low 2 bits: One greater than the balance factor of this subtree, | |||
* which is equal to height(right) - height(left). The mapping is: | |||
* | |||
* 00 => -1 | |||
* 01 => 0 | |||
* 10 => +1 | |||
* 11 => undefined | |||
* | |||
* The rest of the bits are the pointer to the parent node. It must be | |||
* 4-byte aligned, and it will be NULL if this is the root node and | |||
* therefore has no parent. */ | |||
uintptr_t parent_balance; | |||
/* Pointer to parent combined with the balance factor. This saves 4 or | |||
* 8 bytes of memory depending on the CPU architecture. | |||
* | |||
* Low 2 bits: One greater than the balance factor of this subtree, | |||
* which is equal to height(right) - height(left). The mapping is: | |||
* | |||
* 00 => -1 | |||
* 01 => 0 | |||
* 10 => +1 | |||
* 11 => undefined | |||
* | |||
* The rest of the bits are the pointer to the parent node. It must be | |||
* 4-byte aligned, and it will be NULL if this is the root node and | |||
* therefore has no parent. */ | |||
uintptr_t parent_balance; | |||
}; | |||
/* Cast an AVL tree node to the containing data structure. */ | |||
#define avl_tree_entry(entry, type, member) \ | |||
((type*) ((char *)(entry) - offsetof(type, member))) | |||
#define avl_tree_entry(entry, type, member) \ | |||
((type *) ((char *) (entry) -offsetof(type, member))) | |||
/* Returns a pointer to the parent of the specified AVL tree node, or NULL if it | |||
* is already the root of the tree. */ | |||
static AVL_INLINE struct avl_tree_node * | |||
avl_get_parent(const struct avl_tree_node *node) | |||
{ | |||
return (struct avl_tree_node *)(node->parent_balance & ~3); | |||
return (struct avl_tree_node *) (node->parent_balance & ~3); | |||
} | |||
/* Marks the specified AVL tree node as unlinked from any tree. */ | |||
static AVL_INLINE void | |||
avl_tree_node_set_unlinked(struct avl_tree_node *node) | |||
{ | |||
node->parent_balance = (uintptr_t)node; | |||
node->parent_balance = (uintptr_t) node; | |||
} | |||
/* Returns true iff the specified AVL tree node has been marked with | |||
@@ -86,30 +86,29 @@ avl_tree_node_set_unlinked(struct avl_tree_node *node) | |||
static AVL_INLINE int | |||
avl_tree_node_is_unlinked(const struct avl_tree_node *node) | |||
{ | |||
return node->parent_balance == (uintptr_t)node; | |||
return node->parent_balance == (uintptr_t) node; | |||
} | |||
/* (Internal use only) */ | |||
extern void | |||
avl_tree_rebalance_after_insert(struct avl_tree_node **root_ptr, | |||
struct avl_tree_node *inserted); | |||
extern void avl_tree_rebalance_after_insert(struct avl_tree_node **root_ptr, | |||
struct avl_tree_node *inserted); | |||
/* | |||
* Looks up an item in the specified AVL tree. | |||
* | |||
* @root | |||
* Pointer to the root of the AVL tree. (This can be NULL --- that just | |||
* means the tree is empty.) | |||
* Pointer to the root of the AVL tree. (This can be NULL --- that just | |||
* means the tree is empty.) | |||
* | |||
* @cmp_ctx | |||
* First argument to pass to the comparison callback. This generally | |||
* should be a pointer to an object equal to the one being searched for. | |||
* First argument to pass to the comparison callback. This generally | |||
* should be a pointer to an object equal to the one being searched for. | |||
* | |||
* @cmp | |||
* Comparison callback. Must return < 0, 0, or > 0 if the first argument | |||
* is less than, equal to, or greater than the second argument, | |||
* respectively. The first argument will be @cmp_ctx and the second | |||
* argument will be a pointer to the AVL tree node of an item in the tree. | |||
* Comparison callback. Must return < 0, 0, or > 0 if the first argument | |||
* is less than, equal to, or greater than the second argument, | |||
* respectively. The first argument will be @cmp_ctx and the second | |||
* argument will be a pointer to the AVL tree node of an item in the tree. | |||
* | |||
* Returns a pointer to the AVL tree node of the resulting item, or NULL if the | |||
* item was not found. | |||
@@ -117,48 +116,49 @@ avl_tree_rebalance_after_insert(struct avl_tree_node **root_ptr, | |||
* Example: | |||
* | |||
* struct int_wrapper { | |||
* int data; | |||
* struct avl_tree_node index_node; | |||
* int data; | |||
* struct avl_tree_node index_node; | |||
* }; | |||
* | |||
* static int _avl_cmp_int_to_node(const void *intptr, | |||
* const struct avl_tree_node *nodeptr) | |||
* const struct avl_tree_node *nodeptr) | |||
* { | |||
* int n1 = *(const int *)intptr; | |||
* int n2 = avl_tree_entry(nodeptr, struct int_wrapper, index_node)->data; | |||
* if (n1 < n2) | |||
* return -1; | |||
* else if (n1 > n2) | |||
* return 1; | |||
* else | |||
* return 0; | |||
* int n1 = *(const int *)intptr; | |||
* int n2 = avl_tree_entry(nodeptr, struct int_wrapper, index_node)->data; | |||
* if (n1 < n2) | |||
* return -1; | |||
* else if (n1 > n2) | |||
* return 1; | |||
* else | |||
* return 0; | |||
* } | |||
* | |||
* bool contains_int(struct avl_tree_node *root, int n) | |||
* { | |||
* struct avl_tree_node *result; | |||
* struct avl_tree_node *result; | |||
* | |||
* result = avl_tree_lookup(root, &n, _avl_cmp_int_to_node); | |||
* return result ? true : false; | |||
* result = avl_tree_lookup(root, &n, _avl_cmp_int_to_node); | |||
* return result ? true : false; | |||
* } | |||
*/ | |||
static AVL_INLINE struct avl_tree_node * | |||
avl_tree_lookup(const struct avl_tree_node *root, | |||
const void *cmp_ctx, | |||
int (*cmp)(const void *, const struct avl_tree_node *)) | |||
const void *cmp_ctx, | |||
int (*cmp)(const void *, const struct avl_tree_node *)) | |||
{ | |||
const struct avl_tree_node *cur = root; | |||
const struct avl_tree_node *cur = root; | |||
while (cur) { | |||
int res = (*cmp)(cmp_ctx, cur); | |||
if (res < 0) | |||
cur = cur->left; | |||
else if (res > 0) | |||
cur = cur->right; | |||
else | |||
break; | |||
} | |||
return (struct avl_tree_node*)cur; | |||
while (cur) { | |||
int res = (*cmp)(cmp_ctx, cur); | |||
if (res < 0) { | |||
cur = cur->left; | |||
} else if (res > 0) { | |||
cur = cur->right; | |||
} else { | |||
break; | |||
} | |||
} | |||
return (struct avl_tree_node *) cur; | |||
} | |||
/* Same as avl_tree_lookup(), but uses a more specific type for the comparison | |||
@@ -167,44 +167,45 @@ avl_tree_lookup(const struct avl_tree_node *root, | |||
* embedded 'struct avl_tree_node'. */ | |||
static AVL_INLINE struct avl_tree_node * | |||
avl_tree_lookup_node(const struct avl_tree_node *root, | |||
const struct avl_tree_node *node, | |||
int (*cmp)(const struct avl_tree_node *, | |||
const struct avl_tree_node *)) | |||
const struct avl_tree_node *node, | |||
int (*cmp)(const struct avl_tree_node *, | |||
const struct avl_tree_node *)) | |||
{ | |||
const struct avl_tree_node *cur = root; | |||
const struct avl_tree_node *cur = root; | |||
while (cur) { | |||
int res = (*cmp)(node, cur); | |||
if (res < 0) | |||
cur = cur->left; | |||
else if (res > 0) | |||
cur = cur->right; | |||
else | |||
break; | |||
} | |||
return (struct avl_tree_node*)cur; | |||
while (cur) { | |||
int res = (*cmp)(node, cur); | |||
if (res < 0) { | |||
cur = cur->left; | |||
} else if (res > 0) { | |||
cur = cur->right; | |||
} else { | |||
break; | |||
} | |||
} | |||
return (struct avl_tree_node *) cur; | |||
} | |||
/* | |||
* Inserts an item into the specified AVL tree. | |||
* | |||
* @root_ptr | |||
* Location of the AVL tree's root pointer. Indirection is needed because | |||
* the root node may change as a result of rotations caused by the | |||
* insertion. Initialize *root_ptr to NULL for an empty tree. | |||
* Location of the AVL tree's root pointer. Indirection is needed because | |||
* the root node may change as a result of rotations caused by the | |||
* insertion. Initialize *root_ptr to NULL for an empty tree. | |||
* | |||
* @item | |||
* Pointer to the `struct avl_tree_node' embedded in the item to insert. | |||
* No members in it need be pre-initialized, although members in the | |||
* containing structure should be pre-initialized so that @cmp can use them | |||
* in comparisons. | |||
* Pointer to the `struct avl_tree_node' embedded in the item to insert. | |||
* No members in it need be pre-initialized, although members in the | |||
* containing structure should be pre-initialized so that @cmp can use them | |||
* in comparisons. | |||
* | |||
* @cmp | |||
* Comparison callback. Must return < 0, 0, or > 0 if the first argument | |||
* is less than, equal to, or greater than the second argument, | |||
* respectively. The first argument will be @item and the second | |||
* argument will be a pointer to an AVL tree node embedded in some | |||
* previously-inserted item to which @item is being compared. | |||
* Comparison callback. Must return < 0, 0, or > 0 if the first argument | |||
* is less than, equal to, or greater than the second argument, | |||
* respectively. The first argument will be @item and the second | |||
* argument will be a pointer to an AVL tree node embedded in some | |||
* previously-inserted item to which @item is being compared. | |||
* | |||
* If no item in the tree is comparatively equal (via @cmp) to @item, inserts | |||
* @item and returns NULL. Otherwise does nothing and returns a pointer to the | |||
@@ -214,150 +215,138 @@ avl_tree_lookup_node(const struct avl_tree_node *root, | |||
* Example: | |||
* | |||
* struct int_wrapper { | |||
* int data; | |||
* struct avl_tree_node index_node; | |||
* int data; | |||
* struct avl_tree_node index_node; | |||
* }; | |||
* | |||
* #define GET_DATA(i) avl_tree_entry((i), struct int_wrapper, index_node)->data | |||
* | |||
* static int _avl_cmp_ints(const struct avl_tree_node *node1, | |||
* const struct avl_tree_node *node2) | |||
* const struct avl_tree_node *node2) | |||
* { | |||
* int n1 = GET_DATA(node1); | |||
* int n2 = GET_DATA(node2); | |||
* if (n1 < n2) | |||
* return -1; | |||
* else if (n1 > n2) | |||
* return 1; | |||
* else | |||
* return 0; | |||
* int n1 = GET_DATA(node1); | |||
* int n2 = GET_DATA(node2); | |||
* if (n1 < n2) | |||
* return -1; | |||
* else if (n1 > n2) | |||
* return 1; | |||
* else | |||
* return 0; | |||
* } | |||
* | |||
* bool insert_int(struct avl_tree_node **root_ptr, int data) | |||
* { | |||
* struct int_wrapper *i = malloc(sizeof(struct int_wrapper)); | |||
* i->data = data; | |||
* if (avl_tree_insert(root_ptr, &i->index_node, _avl_cmp_ints)) { | |||
* // Duplicate. | |||
* free(i); | |||
* return false; | |||
* } | |||
* return true; | |||
* struct int_wrapper *i = malloc(sizeof(struct int_wrapper)); | |||
* i->data = data; | |||
* if (avl_tree_insert(root_ptr, &i->index_node, _avl_cmp_ints)) { | |||
* // Duplicate. | |||
* free(i); | |||
* return false; | |||
* } | |||
* return true; | |||
* } | |||
*/ | |||
static AVL_INLINE struct avl_tree_node * | |||
avl_tree_insert(struct avl_tree_node **root_ptr, | |||
struct avl_tree_node *item, | |||
int (*cmp)(const struct avl_tree_node *, | |||
const struct avl_tree_node *)) | |||
struct avl_tree_node *item, | |||
int (*cmp)(const struct avl_tree_node *, const struct avl_tree_node *)) | |||
{ | |||
struct avl_tree_node **cur_ptr = root_ptr, *cur = NULL; | |||
int res; | |||
struct avl_tree_node **cur_ptr = root_ptr, *cur = NULL; | |||
int res; | |||
while (*cur_ptr) { | |||
cur = *cur_ptr; | |||
res = (*cmp)(item, cur); | |||
if (res < 0) | |||
cur_ptr = &cur->left; | |||
else if (res > 0) | |||
cur_ptr = &cur->right; | |||
else | |||
return cur; | |||
} | |||
*cur_ptr = item; | |||
item->parent_balance = (uintptr_t)cur | 1; | |||
avl_tree_rebalance_after_insert(root_ptr, item); | |||
return NULL; | |||
while (*cur_ptr) { | |||
cur = *cur_ptr; | |||
res = (*cmp)(item, cur); | |||
if (res < 0) { | |||
cur_ptr = &cur->left; | |||
} else if (res > 0) { | |||
cur_ptr = &cur->right; | |||
} else { | |||
return cur; | |||
} | |||
} | |||
*cur_ptr = item; | |||
item->parent_balance = (uintptr_t) cur | 1; | |||
avl_tree_rebalance_after_insert(root_ptr, item); | |||
return NULL; | |||
} | |||
/* Removes an item from the specified AVL tree. | |||
* See implementation for details. */ | |||
extern void | |||
avl_tree_remove(struct avl_tree_node **root_ptr, struct avl_tree_node *node); | |||
extern void avl_tree_remove(struct avl_tree_node **root_ptr, | |||
struct avl_tree_node *node); | |||
/* Nonrecursive AVL tree traversal functions */ | |||
extern struct avl_tree_node * | |||
avl_tree_first_in_order(const struct avl_tree_node *root); | |||
extern struct avl_tree_node *avl_tree_first_in_order(const struct avl_tree_node *root); | |||
extern struct avl_tree_node * | |||
avl_tree_last_in_order(const struct avl_tree_node *root); | |||
extern struct avl_tree_node *avl_tree_last_in_order(const struct avl_tree_node *root); | |||
extern struct avl_tree_node * | |||
avl_tree_next_in_order(const struct avl_tree_node *node); | |||
extern struct avl_tree_node *avl_tree_next_in_order(const struct avl_tree_node *node); | |||
extern struct avl_tree_node * | |||
avl_tree_prev_in_order(const struct avl_tree_node *node); | |||
extern struct avl_tree_node *avl_tree_prev_in_order(const struct avl_tree_node *node); | |||
extern struct avl_tree_node * | |||
avl_tree_first_in_postorder(const struct avl_tree_node *root); | |||
extern struct avl_tree_node * | |||
avl_tree_next_in_postorder(const struct avl_tree_node *prev, | |||
const struct avl_tree_node *prev_parent); | |||
const struct avl_tree_node *prev_parent); | |||
/* | |||
* Iterate through the nodes in an AVL tree in sorted order. | |||
* You may not modify the tree during the iteration. | |||
* | |||
* @child_struct | |||
* Variable that will receive a pointer to each struct inserted into the | |||
* tree. | |||
* Variable that will receive a pointer to each struct inserted into the | |||
* tree. | |||
* @root | |||
* Root of the AVL tree. | |||
* Root of the AVL tree. | |||
* @struct_name | |||
* Type of *child_struct. | |||
* Type of *child_struct. | |||
* @struct_member | |||
* Member of @struct_name type that is the AVL tree node. | |||
* Member of @struct_name type that is the AVL tree node. | |||
* | |||
* Example: | |||
* | |||
* struct int_wrapper { | |||
* int data; | |||
* struct avl_tree_node index_node; | |||
* int data; | |||
* struct avl_tree_node index_node; | |||
* }; | |||
* | |||
* void print_ints(struct avl_tree_node *root) | |||
* { | |||
* struct int_wrapper *i; | |||
* struct int_wrapper *i; | |||
* | |||
* avl_tree_for_each_in_order(i, root, struct int_wrapper, index_node) | |||
* printf("%d\n", i->data); | |||
* avl_tree_for_each_in_order(i, root, struct int_wrapper, index_node) | |||
* printf("%d\n", i->data); | |||
* } | |||
*/ | |||
#define avl_tree_for_each_in_order(child_struct, root, \ | |||
struct_name, struct_member) \ | |||
for (struct avl_tree_node *_cur = \ | |||
avl_tree_first_in_order(root); \ | |||
_cur && ((child_struct) = \ | |||
avl_tree_entry(_cur, struct_name, \ | |||
struct_member), 1); \ | |||
_cur = avl_tree_next_in_order(_cur)) | |||
#define avl_tree_for_each_in_order(child_struct, root, struct_name, struct_member) \ | |||
for (struct avl_tree_node *_cur = avl_tree_first_in_order(root); \ | |||
_cur && \ | |||
((child_struct) = avl_tree_entry(_cur, struct_name, struct_member), 1); \ | |||
_cur = avl_tree_next_in_order(_cur)) | |||
/* | |||
* Like avl_tree_for_each_in_order(), but uses the reverse order. | |||
*/ | |||
#define avl_tree_for_each_in_reverse_order(child_struct, root, \ | |||
struct_name, struct_member) \ | |||
for (struct avl_tree_node *_cur = \ | |||
avl_tree_last_in_order(root); \ | |||
_cur && ((child_struct) = \ | |||
avl_tree_entry(_cur, struct_name, \ | |||
struct_member), 1); \ | |||
_cur = avl_tree_prev_in_order(_cur)) | |||
#define avl_tree_for_each_in_reverse_order( \ | |||
child_struct, root, struct_name, struct_member) \ | |||
for (struct avl_tree_node *_cur = avl_tree_last_in_order(root); \ | |||
_cur && \ | |||
((child_struct) = avl_tree_entry(_cur, struct_name, struct_member), 1); \ | |||
_cur = avl_tree_prev_in_order(_cur)) | |||
/* | |||
* Like avl_tree_for_each_in_order(), but iterates through the nodes in | |||
* postorder, so the current node may be deleted or freed. | |||
*/ | |||
#define avl_tree_for_each_in_postorder(child_struct, root, \ | |||
struct_name, struct_member) \ | |||
for (struct avl_tree_node *_cur = \ | |||
avl_tree_first_in_postorder(root), *_parent; \ | |||
_cur && ((child_struct) = \ | |||
avl_tree_entry(_cur, struct_name, \ | |||
struct_member), 1) \ | |||
&& (_parent = avl_get_parent(_cur), 1); \ | |||
_cur = avl_tree_next_in_postorder(_cur, _parent)) | |||
#define avl_tree_for_each_in_postorder(child_struct, root, struct_name, struct_member) \ | |||
for (struct avl_tree_node *_cur = avl_tree_first_in_postorder(root), *_parent; \ | |||
_cur && \ | |||
((child_struct) = avl_tree_entry(_cur, struct_name, struct_member), 1) && \ | |||
(_parent = avl_get_parent(_cur), 1); \ | |||
_cur = avl_tree_next_in_postorder(_cur, _parent)) | |||
#endif /* _AVL_TREE_H_ */ |
@@ -23,55 +23,56 @@ SOFTWARE. | |||
#pragma once | |||
#ifndef PY_SSIZE_T_CLEAN | |||
#define PY_SSIZE_T_CLEAN // See: https://docs.python.org/3/c-api/arg.html | |||
# define PY_SSIZE_T_CLEAN // See: https://docs.python.org/3/c-api/arg.html | |||
#endif | |||
#include <Python.h> | |||
#include <structmember.h> | |||
#include <bytesobject.h> | |||
#include <structmember.h> | |||
#include "avl_tree.h" | |||
/* Compatibility macros */ | |||
#ifndef uint64_t | |||
#define uint64_t unsigned PY_LONG_LONG | |||
# define uint64_t unsigned PY_LONG_LONG | |||
#endif | |||
#define malloc PyObject_Malloc // XXX: yuck | |||
#define malloc PyObject_Malloc // XXX: yuck | |||
#define realloc PyObject_Realloc | |||
#define free PyObject_Free | |||
/* Unicode support macros */ | |||
#define PyUnicode_FROM_SINGLE(chr) \ | |||
#define PyUnicode_FROM_SINGLE(chr) \ | |||
PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &(chr), 1) | |||
/* Error handling macros */ | |||
#define BAD_ROUTE self->route_state | |||
#define BAD_ROUTE_CONTEXT self->route_context | |||
#define FAIL_ROUTE(context) { \ | |||
self->route_state = 1; \ | |||
self->route_context = context; \ | |||
} | |||
#define RESET_ROUTE() self->route_state = 0 | |||
#define BAD_ROUTE self->route_state | |||
#define BAD_ROUTE_CONTEXT self->route_context | |||
#define FAIL_ROUTE(context) \ | |||
do { \ | |||
self->route_state = 1; \ | |||
self->route_context = context; \ | |||
} while (0) | |||
#define RESET_ROUTE() self->route_state = 0 | |||
/* Shared globals */ | |||
extern char** entitydefs; | |||
extern char **entitydefs; | |||
extern PyObject* NOARGS; | |||
extern PyObject* definitions; | |||
extern PyObject *NOARGS; | |||
extern PyObject *definitions; | |||
/* Structs */ | |||
typedef struct { | |||
Py_ssize_t capacity; | |||
Py_ssize_t length; | |||
PyObject* object; | |||
PyObject *object; | |||
int kind; | |||
void* data; | |||
void *data; | |||
} Textbuffer; | |||
typedef struct { | |||
@@ -80,19 +81,19 @@ typedef struct { | |||
} StackIdent; | |||
struct Stack { | |||
PyObject* stack; | |||
PyObject *stack; | |||
uint64_t context; | |||
Textbuffer* textbuffer; | |||
Textbuffer *textbuffer; | |||
StackIdent ident; | |||
struct Stack* next; | |||
struct Stack *next; | |||
}; | |||
typedef struct Stack Stack; | |||
typedef struct { | |||
PyObject* object; /* base PyUnicodeObject object */ | |||
Py_ssize_t length; /* length of object, in code points */ | |||
int kind; /* object's kind value */ | |||
void* data; /* object's raw unicode buffer */ | |||
PyObject *object; /* base PyUnicodeObject object */ | |||
Py_ssize_t length; /* length of object, in code points */ | |||
int kind; /* object's kind value */ | |||
void *data; /* object's raw unicode buffer */ | |||
} TokenizerInput; | |||
typedef struct avl_tree_node avl_tree; | |||
@@ -104,13 +105,13 @@ typedef struct { | |||
typedef struct { | |||
PyObject_HEAD | |||
TokenizerInput text; /* text to tokenize */ | |||
Stack* topstack; /* topmost stack */ | |||
Py_ssize_t head; /* current position in text */ | |||
int global; /* global context */ | |||
int depth; /* stack recursion depth */ | |||
int route_state; /* whether a BadRoute has been triggered */ | |||
uint64_t route_context; /* context when the last BadRoute was triggered */ | |||
avl_tree* bad_routes; /* stack idents for routes known to fail */ | |||
int skip_style_tags; /* temp fix for the sometimes broken tag parser */ | |||
TokenizerInput text; /* text to tokenize */ | |||
Stack *topstack; /* topmost stack */ | |||
Py_ssize_t head; /* current position in text */ | |||
int global; /* global context */ | |||
int depth; /* stack recursion depth */ | |||
int route_state; /* whether a BadRoute has been triggered */ | |||
uint64_t route_context; /* context when the last BadRoute was triggered */ | |||
avl_tree *bad_routes; /* stack idents for routes known to fail */ | |||
int skip_style_tags; /* temp fix for the sometimes broken tag parser */ | |||
} Tokenizer; |
@@ -89,11 +89,17 @@ SOFTWARE. | |||
/* Aggregate contexts */ | |||
#define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE | LC_TABLE_OPEN) | |||
#define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) | |||
#define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE | LC_TABLE_ROW_OPEN) | |||
#define AGG_NO_WIKILINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI) | |||
#define AGG_NO_EXT_LINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK) | |||
#define AGG_FAIL \ | |||
(LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | \ | |||
LC_TAG | LC_STYLE | LC_TABLE_OPEN) | |||
#define AGG_UNSAFE \ | |||
(LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | \ | |||
LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) | |||
#define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE | LC_TABLE_ROW_OPEN) | |||
#define AGG_NO_WIKILINKS \ | |||
(LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI) | |||
#define AGG_NO_EXT_LINKS \ | |||
(LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK) | |||
/* Tag contexts */ | |||
@@ -27,7 +27,8 @@ SOFTWARE. | |||
See the Python version for data sources. | |||
*/ | |||
static const char* URI_SCHEMES[] = { | |||
// clang-format off | |||
static const char *URI_SCHEMES[] = { | |||
"bitcoin", | |||
"ftp", | |||
"ftps", | |||
@@ -55,10 +56,10 @@ static const char* URI_SCHEMES[] = { | |||
"urn", | |||
"worldwind", | |||
"xmpp", | |||
NULL, | |||
NULL, | |||
}; | |||
static const char* URI_SCHEMES_AUTHORITY_OPTIONAL[] = { | |||
static const char *URI_SCHEMES_AUTHORITY_OPTIONAL[] = { | |||
"bitcoin", | |||
"geo", | |||
"magnet", | |||
@@ -73,7 +74,7 @@ static const char* URI_SCHEMES_AUTHORITY_OPTIONAL[] = { | |||
NULL, | |||
}; | |||
static const char* PARSER_BLACKLIST[] = { | |||
static const char *PARSER_BLACKLIST[] = { | |||
"categorytree", | |||
"ce", | |||
"chem", | |||
@@ -93,32 +94,32 @@ static const char* PARSER_BLACKLIST[] = { | |||
"timeline", | |||
NULL, | |||
}; | |||
// clang-format on | |||
static const char* SINGLE[] = { | |||
"br", "wbr", "hr", "meta", "link", "img", "li", "dt", "dd", "th", "td", | |||
"tr", NULL | |||
}; | |||
static const char *SINGLE[] = { | |||
"br", "wbr", "hr", "meta", "link", "img", "li", "dt", "dd", "th", "td", "tr", NULL}; | |||
static const char* SINGLE_ONLY[] = { | |||
"br", "wbr", "hr", "meta", "link", "img", NULL | |||
}; | |||
static const char *SINGLE_ONLY[] = {"br", "wbr", "hr", "meta", "link", "img", NULL}; | |||
/* | |||
Convert a PyUnicodeObject to a lowercase ASCII char* array and store it in | |||
the second argument. The caller must free the return value when finished. | |||
If the return value is NULL, the conversion failed and *string is not set. | |||
*/ | |||
static PyObject* unicode_to_lcase_ascii(PyObject *input, const char **string) | |||
static PyObject * | |||
unicode_to_lcase_ascii(PyObject *input, const char **string) | |||
{ | |||
PyObject *lower = PyObject_CallMethod(input, "lower", NULL), *bytes; | |||
if (!lower) | |||
if (!lower) { | |||
return NULL; | |||
} | |||
bytes = PyUnicode_AsASCIIString(lower); | |||
Py_DECREF(lower); | |||
if (!bytes) { | |||
if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) | |||
if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) { | |||
PyErr_Clear(); | |||
} | |||
return NULL; | |||
} | |||
*string = PyBytes_AS_STRING(bytes); | |||
@@ -128,14 +129,16 @@ static PyObject* unicode_to_lcase_ascii(PyObject *input, const char **string) | |||
/* | |||
Return whether a PyUnicodeObject is in a list of lowercase ASCII strings. | |||
*/ | |||
static int unicode_in_string_list(PyObject *input, const char **list) | |||
static int | |||
unicode_in_string_list(PyObject *input, const char **list) | |||
{ | |||
const char *string; | |||
PyObject *temp = unicode_to_lcase_ascii(input, &string); | |||
int retval = 0; | |||
if (!temp) | |||
if (!temp) { | |||
return 0; | |||
} | |||
while (*list) { | |||
if (!strcmp(*(list++), string)) { | |||
@@ -144,7 +147,7 @@ static int unicode_in_string_list(PyObject *input, const char **list) | |||
} | |||
} | |||
end: | |||
end: | |||
Py_DECREF(temp); | |||
return retval; | |||
} | |||
@@ -152,7 +155,8 @@ static int unicode_in_string_list(PyObject *input, const char **list) | |||
/* | |||
Return if the given tag's contents should be passed to the parser. | |||
*/ | |||
int is_parsable(PyObject *tag) | |||
int | |||
is_parsable(PyObject *tag) | |||
{ | |||
return !unicode_in_string_list(tag, PARSER_BLACKLIST); | |||
} | |||
@@ -160,7 +164,8 @@ int is_parsable(PyObject *tag) | |||
/* | |||
Return whether or not the given tag can exist without a close tag. | |||
*/ | |||
int is_single(PyObject *tag) | |||
int | |||
is_single(PyObject *tag) | |||
{ | |||
return unicode_in_string_list(tag, SINGLE); | |||
} | |||
@@ -168,7 +173,8 @@ int is_single(PyObject *tag) | |||
/* | |||
Return whether or not the given tag must exist without a close tag. | |||
*/ | |||
int is_single_only(PyObject *tag) | |||
int | |||
is_single_only(PyObject *tag) | |||
{ | |||
return unicode_in_string_list(tag, SINGLE_ONLY); | |||
} | |||
@@ -176,10 +182,12 @@ int is_single_only(PyObject *tag) | |||
/* | |||
Return whether the given scheme is valid for external links. | |||
*/ | |||
int is_scheme(PyObject *scheme, int slashes) | |||
int | |||
is_scheme(PyObject *scheme, int slashes) | |||
{ | |||
if (slashes) | |||
if (slashes) { | |||
return unicode_in_string_list(scheme, URI_SCHEMES); | |||
else | |||
} else { | |||
return unicode_in_string_list(scheme, URI_SCHEMES_AUTHORITY_OPTIONAL); | |||
} | |||
} |
@@ -28,12 +28,11 @@ SOFTWARE. | |||
/* Functions */ | |||
int is_parsable(PyObject*); | |||
int is_single(PyObject*); | |||
int is_single_only(PyObject*); | |||
int is_scheme(PyObject*, int); | |||
int is_parsable(PyObject *); | |||
int is_single(PyObject *); | |||
int is_single_only(PyObject *); | |||
int is_scheme(PyObject *, int); | |||
/* Macros */ | |||
#define GET_HTML_TAG(markup) \ | |||
(markup == ':' ? "dd" : markup == ';' ? "dt" : "li") | |||
#define GET_HTML_TAG(markup) (markup == ':' ? "dd" : markup == ';' ? "dt" : "li") |
@@ -26,13 +26,14 @@ SOFTWARE. | |||
/* | |||
Initialize a new TagData object. | |||
*/ | |||
TagData* TagData_new(TokenizerInput* text) | |||
TagData * | |||
TagData_new(TokenizerInput *text) | |||
{ | |||
#define ALLOC_BUFFER(name) \ | |||
name = Textbuffer_new(text); \ | |||
if (!name) { \ | |||
TagData_dealloc(self); \ | |||
return NULL; \ | |||
#define ALLOC_BUFFER(name) \ | |||
name = Textbuffer_new(text); \ | |||
if (!name) { \ | |||
TagData_dealloc(self); \ | |||
return NULL; \ | |||
} | |||
TagData *self = malloc(sizeof(TagData)); | |||
@@ -54,25 +55,30 @@ TagData* TagData_new(TokenizerInput* text) | |||
/* | |||
Deallocate the given TagData object. | |||
*/ | |||
void TagData_dealloc(TagData* self) | |||
void | |||
TagData_dealloc(TagData *self) | |||
{ | |||
if (self->pad_first) | |||
if (self->pad_first) { | |||
Textbuffer_dealloc(self->pad_first); | |||
if (self->pad_before_eq) | |||
} | |||
if (self->pad_before_eq) { | |||
Textbuffer_dealloc(self->pad_before_eq); | |||
if (self->pad_after_eq) | |||
} | |||
if (self->pad_after_eq) { | |||
Textbuffer_dealloc(self->pad_after_eq); | |||
} | |||
free(self); | |||
} | |||
/* | |||
Clear the internal buffers of the given TagData object. | |||
*/ | |||
int TagData_reset_buffers(TagData* self) | |||
int | |||
TagData_reset_buffers(TagData *self) | |||
{ | |||
if (Textbuffer_reset(self->pad_first) || | |||
Textbuffer_reset(self->pad_before_eq) || | |||
Textbuffer_reset(self->pad_after_eq)) | |||
if (Textbuffer_reset(self->pad_first) || Textbuffer_reset(self->pad_before_eq) || | |||
Textbuffer_reset(self->pad_after_eq)) { | |||
return -1; | |||
} | |||
return 0; | |||
} |
@@ -29,15 +29,15 @@ SOFTWARE. | |||
typedef struct { | |||
uint64_t context; | |||
Textbuffer* pad_first; | |||
Textbuffer* pad_before_eq; | |||
Textbuffer* pad_after_eq; | |||
Textbuffer *pad_first; | |||
Textbuffer *pad_before_eq; | |||
Textbuffer *pad_after_eq; | |||
Py_UCS4 quoter; | |||
Py_ssize_t reset; | |||
} TagData; | |||
/* Functions */ | |||
TagData* TagData_new(TokenizerInput*); | |||
void TagData_dealloc(TagData*); | |||
int TagData_reset_buffers(TagData*); | |||
TagData *TagData_new(TokenizerInput *); | |||
void TagData_dealloc(TagData *); | |||
int TagData_reset_buffers(TagData *); |
@@ -23,20 +23,22 @@ SOFTWARE. | |||
#include "textbuffer.h" | |||
#define INITIAL_CAPACITY 32 | |||
#define RESIZE_FACTOR 2 | |||
#define CONCAT_EXTRA 32 | |||
#define RESIZE_FACTOR 2 | |||
#define CONCAT_EXTRA 32 | |||
/* | |||
Internal allocation function for textbuffers. | |||
*/ | |||
static int internal_alloc(Textbuffer* self, Py_UCS4 maxchar) | |||
static int | |||
internal_alloc(Textbuffer *self, Py_UCS4 maxchar) | |||
{ | |||
self->capacity = INITIAL_CAPACITY; | |||
self->length = 0; | |||
self->object = PyUnicode_New(self->capacity, maxchar); | |||
if (!self->object) | |||
if (!self->object) { | |||
return -1; | |||
} | |||
self->kind = PyUnicode_KIND(self->object); | |||
self->data = PyUnicode_DATA(self->object); | |||
@@ -46,7 +48,8 @@ static int internal_alloc(Textbuffer* self, Py_UCS4 maxchar) | |||
/* | |||
Internal deallocation function for textbuffers. | |||
*/ | |||
static void internal_dealloc(Textbuffer* self) | |||
static void | |||
internal_dealloc(Textbuffer *self) | |||
{ | |||
Py_DECREF(self->object); | |||
} | |||
@@ -54,14 +57,16 @@ static void internal_dealloc(Textbuffer* self) | |||
/* | |||
Internal resize function. | |||
*/ | |||
static int internal_resize(Textbuffer* self, Py_ssize_t new_cap) | |||
static int | |||
internal_resize(Textbuffer *self, Py_ssize_t new_cap) | |||
{ | |||
PyObject *newobj; | |||
void *newdata; | |||
newobj = PyUnicode_New(new_cap, PyUnicode_MAX_CHAR_VALUE(self->object)); | |||
if (!newobj) | |||
if (!newobj) { | |||
return -1; | |||
} | |||
newdata = PyUnicode_DATA(newobj); | |||
memcpy(newdata, self->data, self->length * self->kind); | |||
Py_DECREF(self->object); | |||
@@ -75,22 +80,25 @@ static int internal_resize(Textbuffer* self, Py_ssize_t new_cap) | |||
/* | |||
Create a new textbuffer object. | |||
*/ | |||
Textbuffer* Textbuffer_new(TokenizerInput* text) | |||
Textbuffer * | |||
Textbuffer_new(TokenizerInput *text) | |||
{ | |||
Textbuffer* self = malloc(sizeof(Textbuffer)); | |||
Textbuffer *self = malloc(sizeof(Textbuffer)); | |||
Py_UCS4 maxchar = 0; | |||
maxchar = PyUnicode_MAX_CHAR_VALUE(text->object); | |||
if (!self) | |||
if (!self) { | |||
goto fail_nomem; | |||
if (internal_alloc(self, maxchar) < 0) | |||
} | |||
if (internal_alloc(self, maxchar) < 0) { | |||
goto fail_dealloc; | |||
} | |||
return self; | |||
fail_dealloc: | |||
fail_dealloc: | |||
free(self); | |||
fail_nomem: | |||
fail_nomem: | |||
PyErr_NoMemory(); | |||
return NULL; | |||
} | |||
@@ -98,7 +106,8 @@ Textbuffer* Textbuffer_new(TokenizerInput* text) | |||
/* | |||
Deallocate the given textbuffer. | |||
*/ | |||
void Textbuffer_dealloc(Textbuffer* self) | |||
void | |||
Textbuffer_dealloc(Textbuffer *self) | |||
{ | |||
internal_dealloc(self); | |||
free(self); | |||
@@ -107,26 +116,30 @@ void Textbuffer_dealloc(Textbuffer* self) | |||
/* | |||
Reset a textbuffer to its initial, empty state. | |||
*/ | |||
int Textbuffer_reset(Textbuffer* self) | |||
int | |||
Textbuffer_reset(Textbuffer *self) | |||
{ | |||
Py_UCS4 maxchar = 0; | |||
maxchar = PyUnicode_MAX_CHAR_VALUE(self->object); | |||
internal_dealloc(self); | |||
if (internal_alloc(self, maxchar)) | |||
if (internal_alloc(self, maxchar)) { | |||
return -1; | |||
} | |||
return 0; | |||
} | |||
/* | |||
Write a Unicode codepoint to the given textbuffer. | |||
*/ | |||
int Textbuffer_write(Textbuffer* self, Py_UCS4 code) | |||
int | |||
Textbuffer_write(Textbuffer *self, Py_UCS4 code) | |||
{ | |||
if (self->length >= self->capacity) { | |||
if (internal_resize(self, self->capacity * RESIZE_FACTOR) < 0) | |||
if (internal_resize(self, self->capacity * RESIZE_FACTOR) < 0) { | |||
return -1; | |||
} | |||
} | |||
PyUnicode_WRITE(self->kind, self->data, self->length++, code); | |||
@@ -139,7 +152,8 @@ int Textbuffer_write(Textbuffer* self, Py_UCS4 code) | |||
This function does not check for bounds. | |||
*/ | |||
Py_UCS4 Textbuffer_read(Textbuffer* self, Py_ssize_t index) | |||
Py_UCS4 | |||
Textbuffer_read(Textbuffer *self, Py_ssize_t index) | |||
{ | |||
return PyUnicode_READ(self->kind, self->data, index); | |||
} | |||
@@ -147,7 +161,8 @@ Py_UCS4 Textbuffer_read(Textbuffer* self, Py_ssize_t index) | |||
/* | |||
Return the contents of the textbuffer as a Python Unicode object. | |||
*/ | |||
PyObject* Textbuffer_render(Textbuffer* self) | |||
PyObject * | |||
Textbuffer_render(Textbuffer *self) | |||
{ | |||
return PyUnicode_FromKindAndData(self->kind, self->data, self->length); | |||
} | |||
@@ -155,17 +170,20 @@ PyObject* Textbuffer_render(Textbuffer* self) | |||
/* | |||
Concatenate the 'other' textbuffer onto the end of the given textbuffer. | |||
*/ | |||
int Textbuffer_concat(Textbuffer* self, Textbuffer* other) | |||
int | |||
Textbuffer_concat(Textbuffer *self, Textbuffer *other) | |||
{ | |||
Py_ssize_t newlen = self->length + other->length; | |||
if (newlen > self->capacity) { | |||
if (internal_resize(self, newlen + CONCAT_EXTRA) < 0) | |||
if (internal_resize(self, newlen + CONCAT_EXTRA) < 0) { | |||
return -1; | |||
} | |||
} | |||
assert(self->kind == other->kind); | |||
memcpy(((Py_UCS1*) self->data) + self->kind * self->length, other->data, | |||
memcpy(((Py_UCS1 *) self->data) + self->kind * self->length, | |||
other->data, | |||
other->length * other->kind); | |||
self->length = newlen; | |||
@@ -175,15 +193,16 @@ int Textbuffer_concat(Textbuffer* self, Textbuffer* other) | |||
/* | |||
Reverse the contents of the given textbuffer. | |||
*/ | |||
void Textbuffer_reverse(Textbuffer* self) | |||
void | |||
Textbuffer_reverse(Textbuffer *self) | |||
{ | |||
Py_ssize_t i, end = self->length - 1; | |||
Py_UCS4 tmp; | |||
for (i = 0; i < self->length / 2; i++) { | |||
tmp = PyUnicode_READ(self->kind, self->data, i); | |||
PyUnicode_WRITE(self->kind, self->data, i, | |||
PyUnicode_READ(self->kind, self->data, end - i)); | |||
PyUnicode_WRITE( | |||
self->kind, self->data, i, PyUnicode_READ(self->kind, self->data, end - i)); | |||
PyUnicode_WRITE(self->kind, self->data, end - i, tmp); | |||
} | |||
} |
@@ -26,11 +26,11 @@ SOFTWARE. | |||
/* Functions */ | |||
Textbuffer* Textbuffer_new(TokenizerInput*); | |||
void Textbuffer_dealloc(Textbuffer*); | |||
int Textbuffer_reset(Textbuffer*); | |||
int Textbuffer_write(Textbuffer*, Py_UCS4); | |||
Py_UCS4 Textbuffer_read(Textbuffer*, Py_ssize_t); | |||
PyObject* Textbuffer_render(Textbuffer*); | |||
int Textbuffer_concat(Textbuffer*, Textbuffer*); | |||
void Textbuffer_reverse(Textbuffer*); | |||
Textbuffer *Textbuffer_new(TokenizerInput *); | |||
void Textbuffer_dealloc(Textbuffer *); | |||
int Textbuffer_reset(Textbuffer *); | |||
int Textbuffer_write(Textbuffer *, Py_UCS4); | |||
Py_UCS4 Textbuffer_read(Textbuffer *, Py_ssize_t); | |||
PyObject *Textbuffer_render(Textbuffer *); | |||
int Textbuffer_concat(Textbuffer *, Textbuffer *); | |||
void Textbuffer_reverse(Textbuffer *); |
@@ -25,11 +25,12 @@ SOFTWARE. | |||
#include "common.h" | |||
static const Py_UCS4 MARKERS[] = { | |||
'{', '}', '[', ']', '<', '>', '|', '=', '&', '\'', '#', '*', ';', ':', '/', | |||
'-', '!', '\n', '\0'}; | |||
'{', '}', '[', ']', '<', '>', '|', '=', '&', '\'', | |||
'#', '*', ';', ':', '/', '-', '!', '\n', '\0', | |||
}; | |||
#define NUM_MARKERS 19 | |||
/* Functions */ | |||
PyObject* Tokenizer_parse(Tokenizer*, uint64_t, int); | |||
PyObject *Tokenizer_parse(Tokenizer *, uint64_t, int); |
@@ -27,9 +27,10 @@ SOFTWARE. | |||
/* | |||
Add a new token stack, context, and textbuffer to the list. | |||
*/ | |||
int Tokenizer_push(Tokenizer* self, uint64_t context) | |||
int | |||
Tokenizer_push(Tokenizer *self, uint64_t context) | |||
{ | |||
Stack* top = malloc(sizeof(Stack)); | |||
Stack *top = malloc(sizeof(Stack)); | |||
if (!top) { | |||
PyErr_NoMemory(); | |||
@@ -38,8 +39,9 @@ int Tokenizer_push(Tokenizer* self, uint64_t context) | |||
top->stack = PyList_New(0); | |||
top->context = context; | |||
top->textbuffer = Textbuffer_new(&self->text); | |||
if (!top->textbuffer) | |||
if (!top->textbuffer) { | |||
return -1; | |||
} | |||
top->ident.head = self->head; | |||
top->ident.context = context; | |||
top->next = self->topstack; | |||
@@ -51,16 +53,19 @@ int Tokenizer_push(Tokenizer* self, uint64_t context) | |||
/* | |||
Push the textbuffer onto the stack as a Text node and clear it. | |||
*/ | |||
int Tokenizer_push_textbuffer(Tokenizer* self) | |||
int | |||
Tokenizer_push_textbuffer(Tokenizer *self) | |||
{ | |||
PyObject *text, *kwargs, *token; | |||
Textbuffer* buffer = self->topstack->textbuffer; | |||
Textbuffer *buffer = self->topstack->textbuffer; | |||
if (buffer->length == 0) | |||
if (buffer->length == 0) { | |||
return 0; | |||
} | |||
text = Textbuffer_render(buffer); | |||
if (!text) | |||
if (!text) { | |||
return -1; | |||
} | |||
kwargs = PyDict_New(); | |||
if (!kwargs) { | |||
Py_DECREF(text); | |||
@@ -70,24 +75,27 @@ int Tokenizer_push_textbuffer(Tokenizer* self) | |||
Py_DECREF(text); | |||
token = PyObject_Call(Text, NOARGS, kwargs); | |||
Py_DECREF(kwargs); | |||
if (!token) | |||
if (!token) { | |||
return -1; | |||
} | |||
if (PyList_Append(self->topstack->stack, token)) { | |||
Py_DECREF(token); | |||
return -1; | |||
} | |||
Py_DECREF(token); | |||
if (Textbuffer_reset(buffer)) | |||
if (Textbuffer_reset(buffer)) { | |||
return -1; | |||
} | |||
return 0; | |||
} | |||
/* | |||
Pop and deallocate the top token stack/context/textbuffer. | |||
*/ | |||
void Tokenizer_delete_top_of_stack(Tokenizer* self) | |||
void | |||
Tokenizer_delete_top_of_stack(Tokenizer *self) | |||
{ | |||
Stack* top = self->topstack; | |||
Stack *top = self->topstack; | |||
Py_DECREF(top->stack); | |||
Textbuffer_dealloc(top->textbuffer); | |||
@@ -99,12 +107,14 @@ void Tokenizer_delete_top_of_stack(Tokenizer* self) | |||
/* | |||
Pop the current stack/context/textbuffer, returing the stack. | |||
*/ | |||
PyObject* Tokenizer_pop(Tokenizer* self) | |||
PyObject * | |||
Tokenizer_pop(Tokenizer *self) | |||
{ | |||
PyObject* stack; | |||
PyObject *stack; | |||
if (Tokenizer_push_textbuffer(self)) | |||
if (Tokenizer_push_textbuffer(self)) { | |||
return NULL; | |||
} | |||
stack = self->topstack->stack; | |||
Py_INCREF(stack); | |||
Tokenizer_delete_top_of_stack(self); | |||
@@ -115,13 +125,15 @@ PyObject* Tokenizer_pop(Tokenizer* self) | |||
Pop the current stack/context/textbuffer, returing the stack. We will also | |||
replace the underlying stack's context with the current stack's. | |||
*/ | |||
PyObject* Tokenizer_pop_keeping_context(Tokenizer* self) | |||
PyObject * | |||
Tokenizer_pop_keeping_context(Tokenizer *self) | |||
{ | |||
PyObject* stack; | |||
PyObject *stack; | |||
uint64_t context; | |||
if (Tokenizer_push_textbuffer(self)) | |||
if (Tokenizer_push_textbuffer(self)) { | |||
return NULL; | |||
} | |||
stack = self->topstack->stack; | |||
Py_INCREF(stack); | |||
context = self->topstack->context; | |||
@@ -133,16 +145,18 @@ PyObject* Tokenizer_pop_keeping_context(Tokenizer* self) | |||
/* | |||
Compare two route_tree_nodes that are in their avl_tree_node forms. | |||
*/ | |||
static int compare_nodes( | |||
const struct avl_tree_node* na, const struct avl_tree_node* nb) | |||
static int | |||
compare_nodes(const struct avl_tree_node *na, const struct avl_tree_node *nb) | |||
{ | |||
route_tree_node *a = avl_tree_entry(na, route_tree_node, node); | |||
route_tree_node *b = avl_tree_entry(nb, route_tree_node, node); | |||
if (a->id.head < b->id.head) | |||
if (a->id.head < b->id.head) { | |||
return -1; | |||
if (a->id.head > b->id.head) | |||
} | |||
if (a->id.head > b->id.head) { | |||
return 1; | |||
} | |||
return (a->id.context > b->id.context) - (a->id.context < b->id.context); | |||
} | |||
@@ -152,13 +166,15 @@ static int compare_nodes( | |||
This will be noticed when calling Tokenizer_check_route with the same head | |||
and context, and the route will be failed immediately. | |||
*/ | |||
void Tokenizer_memoize_bad_route(Tokenizer *self) | |||
void | |||
Tokenizer_memoize_bad_route(Tokenizer *self) | |||
{ | |||
route_tree_node *node = malloc(sizeof(route_tree_node)); | |||
if (node) { | |||
node->id = self->topstack->ident; | |||
if (avl_tree_insert(&self->bad_routes, &node->node, compare_nodes)) | |||
if (avl_tree_insert(&self->bad_routes, &node->node, compare_nodes)) { | |||
free(node); | |||
} | |||
} | |||
} | |||
@@ -168,10 +184,11 @@ void Tokenizer_memoize_bad_route(Tokenizer *self) | |||
ident of the failed stack so future parsing attempts down this route can be | |||
stopped early. | |||
*/ | |||
void* Tokenizer_fail_route(Tokenizer* self) | |||
void * | |||
Tokenizer_fail_route(Tokenizer *self) | |||
{ | |||
uint64_t context = self->topstack->context; | |||
PyObject* stack; | |||
PyObject *stack; | |||
Tokenizer_memoize_bad_route(self); | |||
stack = Tokenizer_pop(self); | |||
@@ -193,10 +210,11 @@ void* Tokenizer_fail_route(Tokenizer* self) | |||
but this would introduce too much overhead in C tokenizer due to the need | |||
to check for a bad route after every call to Tokenizer_push.) | |||
*/ | |||
int Tokenizer_check_route(Tokenizer* self, uint64_t context) | |||
int | |||
Tokenizer_check_route(Tokenizer *self, uint64_t context) | |||
{ | |||
StackIdent ident = {self->head, context}; | |||
struct avl_tree_node *node = (struct avl_tree_node*) (&ident + 1); | |||
struct avl_tree_node *node = (struct avl_tree_node *) (&ident + 1); | |||
if (avl_tree_lookup_node(self->bad_routes, node, compare_nodes)) { | |||
FAIL_ROUTE(context); | |||
@@ -209,7 +227,8 @@ int Tokenizer_check_route(Tokenizer* self, uint64_t context) | |||
Free the tokenizer's bad route cache tree. Intended to be called by the | |||
main tokenizer function after parsing is finished. | |||
*/ | |||
void Tokenizer_free_bad_route_tree(Tokenizer *self) | |||
void | |||
Tokenizer_free_bad_route_tree(Tokenizer *self) | |||
{ | |||
struct avl_tree_node *cur = avl_tree_first_in_postorder(self->bad_routes); | |||
struct avl_tree_node *parent; | |||
@@ -225,17 +244,20 @@ void Tokenizer_free_bad_route_tree(Tokenizer *self) | |||
/* | |||
Write a token to the current token stack. | |||
*/ | |||
int Tokenizer_emit_token(Tokenizer* self, PyObject* token, int first) | |||
int | |||
Tokenizer_emit_token(Tokenizer *self, PyObject *token, int first) | |||
{ | |||
PyObject* instance; | |||
PyObject *instance; | |||
if (Tokenizer_push_textbuffer(self)) | |||
if (Tokenizer_push_textbuffer(self)) { | |||
return -1; | |||
} | |||
instance = PyObject_CallObject(token, NULL); | |||
if (!instance) | |||
if (!instance) { | |||
return -1; | |||
if (first ? PyList_Insert(self->topstack->stack, 0, instance) : | |||
PyList_Append(self->topstack->stack, instance)) { | |||
} | |||
if (first ? PyList_Insert(self->topstack->stack, 0, instance) | |||
: PyList_Append(self->topstack->stack, instance)) { | |||
Py_DECREF(instance); | |||
return -1; | |||
} | |||
@@ -247,10 +269,13 @@ int Tokenizer_emit_token(Tokenizer* self, PyObject* token, int first) | |||
Write a token to the current token stack, with kwargs. Steals a reference | |||
to kwargs. | |||
*/ | |||
int Tokenizer_emit_token_kwargs(Tokenizer* self, PyObject* token, | |||
PyObject* kwargs, int first) | |||
int | |||
Tokenizer_emit_token_kwargs(Tokenizer *self, | |||
PyObject *token, | |||
PyObject *kwargs, | |||
int first) | |||
{ | |||
PyObject* instance; | |||
PyObject *instance; | |||
if (Tokenizer_push_textbuffer(self)) { | |||
Py_DECREF(kwargs); | |||
@@ -261,8 +286,8 @@ int Tokenizer_emit_token_kwargs(Tokenizer* self, PyObject* token, | |||
Py_DECREF(kwargs); | |||
return -1; | |||
} | |||
if (first ? PyList_Insert(self->topstack->stack, 0, instance): | |||
PyList_Append(self->topstack->stack, instance)) { | |||
if (first ? PyList_Insert(self->topstack->stack, 0, instance) | |||
: PyList_Append(self->topstack->stack, instance)) { | |||
Py_DECREF(instance); | |||
Py_DECREF(kwargs); | |||
return -1; | |||
@@ -275,7 +300,8 @@ int Tokenizer_emit_token_kwargs(Tokenizer* self, PyObject* token, | |||
/* | |||
Write a Unicode codepoint to the current textbuffer. | |||
*/ | |||
int Tokenizer_emit_char(Tokenizer* self, Py_UCS4 code) | |||
int | |||
Tokenizer_emit_char(Tokenizer *self, Py_UCS4 code) | |||
{ | |||
return Textbuffer_write(self->topstack->textbuffer, code); | |||
} | |||
@@ -283,13 +309,15 @@ int Tokenizer_emit_char(Tokenizer* self, Py_UCS4 code) | |||
/* | |||
Write a string of text to the current textbuffer. | |||
*/ | |||
int Tokenizer_emit_text(Tokenizer* self, const char* text) | |||
int | |||
Tokenizer_emit_text(Tokenizer *self, const char *text) | |||
{ | |||
int i = 0; | |||
while (text[i]) { | |||
if (Tokenizer_emit_char(self, text[i])) | |||
if (Tokenizer_emit_char(self, text[i])) { | |||
return -1; | |||
} | |||
i++; | |||
} | |||
return 0; | |||
@@ -299,7 +327,8 @@ int Tokenizer_emit_text(Tokenizer* self, const char* text) | |||
Write the contents of another textbuffer to the current textbuffer, | |||
deallocating it in the process. | |||
*/ | |||
int Tokenizer_emit_textbuffer(Tokenizer* self, Textbuffer* buffer) | |||
int | |||
Tokenizer_emit_textbuffer(Tokenizer *self, Textbuffer *buffer) | |||
{ | |||
int retval = Textbuffer_concat(self->topstack->textbuffer, buffer); | |||
Textbuffer_dealloc(buffer); | |||
@@ -309,55 +338,63 @@ int Tokenizer_emit_textbuffer(Tokenizer* self, Textbuffer* buffer) | |||
/* | |||
Write a series of tokens to the current stack at once. | |||
*/ | |||
int Tokenizer_emit_all(Tokenizer* self, PyObject* tokenlist) | |||
int | |||
Tokenizer_emit_all(Tokenizer *self, PyObject *tokenlist) | |||
{ | |||
int pushed = 0; | |||
PyObject *stack, *token, *left, *right, *text; | |||
Textbuffer* buffer; | |||
Textbuffer *buffer; | |||
Py_ssize_t size; | |||
if (PyList_GET_SIZE(tokenlist) > 0) { | |||
token = PyList_GET_ITEM(tokenlist, 0); | |||
switch (PyObject_IsInstance(token, Text)) { | |||
case 0: | |||
case 0: | |||
break; | |||
case 1: { | |||
pushed = 1; | |||
buffer = self->topstack->textbuffer; | |||
if (buffer->length == 0) { | |||
break; | |||
case 1: { | |||
pushed = 1; | |||
buffer = self->topstack->textbuffer; | |||
if (buffer->length == 0) | |||
break; | |||
left = Textbuffer_render(buffer); | |||
if (!left) | |||
return -1; | |||
right = PyObject_GetAttrString(token, "text"); | |||
if (!right) | |||
return -1; | |||
text = PyUnicode_Concat(left, right); | |||
Py_DECREF(left); | |||
Py_DECREF(right); | |||
if (!text) | |||
return -1; | |||
if (PyObject_SetAttrString(token, "text", text)) { | |||
Py_DECREF(text); | |||
return -1; | |||
} | |||
} | |||
left = Textbuffer_render(buffer); | |||
if (!left) { | |||
return -1; | |||
} | |||
right = PyObject_GetAttrString(token, "text"); | |||
if (!right) { | |||
return -1; | |||
} | |||
text = PyUnicode_Concat(left, right); | |||
Py_DECREF(left); | |||
Py_DECREF(right); | |||
if (!text) { | |||
return -1; | |||
} | |||
if (PyObject_SetAttrString(token, "text", text)) { | |||
Py_DECREF(text); | |||
if (Textbuffer_reset(buffer)) | |||
return -1; | |||
break; | |||
return -1; | |||
} | |||
case -1: | |||
Py_DECREF(text); | |||
if (Textbuffer_reset(buffer)) { | |||
return -1; | |||
} | |||
break; | |||
} | |||
case -1: | |||
return -1; | |||
} | |||
} | |||
if (!pushed) { | |||
if (Tokenizer_push_textbuffer(self)) | |||
if (Tokenizer_push_textbuffer(self)) { | |||
return -1; | |||
} | |||
} | |||
stack = self->topstack->stack; | |||
size = PyList_GET_SIZE(stack); | |||
if (PyList_SetSlice(stack, size, size, tokenlist)) | |||
if (PyList_SetSlice(stack, size, size, tokenlist)) { | |||
return -1; | |||
} | |||
return 0; | |||
} | |||
@@ -365,9 +402,10 @@ int Tokenizer_emit_all(Tokenizer* self, PyObject* tokenlist) | |||
Pop the current stack, write text, and then write the stack. 'text' is a | |||
NULL-terminated array of chars. | |||
*/ | |||
int Tokenizer_emit_text_then_stack(Tokenizer* self, const char* text) | |||
int | |||
Tokenizer_emit_text_then_stack(Tokenizer *self, const char *text) | |||
{ | |||
PyObject* stack = Tokenizer_pop(self); | |||
PyObject *stack = Tokenizer_pop(self); | |||
if (Tokenizer_emit_text(self, text)) { | |||
Py_DECREF(stack); | |||
@@ -389,7 +427,8 @@ int Tokenizer_emit_text_then_stack(Tokenizer* self, const char* text) | |||
/* | |||
Internal function to read the codepoint at the given index from the input. | |||
*/ | |||
static Py_UCS4 read_codepoint(TokenizerInput* text, Py_ssize_t index) | |||
static Py_UCS4 | |||
read_codepoint(TokenizerInput *text, Py_ssize_t index) | |||
{ | |||
return PyUnicode_READ(text->kind, text->data, index); | |||
} | |||
@@ -397,24 +436,28 @@ static Py_UCS4 read_codepoint(TokenizerInput* text, Py_ssize_t index) | |||
/* | |||
Read the value at a relative point in the wikicode, forwards. | |||
*/ | |||
Py_UCS4 Tokenizer_read(Tokenizer* self, Py_ssize_t delta) | |||
Py_UCS4 | |||
Tokenizer_read(Tokenizer *self, Py_ssize_t delta) | |||
{ | |||
Py_ssize_t index = self->head + delta; | |||
if (index >= self->text.length) | |||
if (index >= self->text.length) { | |||
return '\0'; | |||
} | |||
return read_codepoint(&self->text, index); | |||
} | |||
/* | |||
Read the value at a relative point in the wikicode, backwards. | |||
*/ | |||
Py_UCS4 Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) | |||
Py_UCS4 | |||
Tokenizer_read_backwards(Tokenizer *self, Py_ssize_t delta) | |||
{ | |||
Py_ssize_t index; | |||
if (delta > self->head) | |||
if (delta > self->head) { | |||
return '\0'; | |||
} | |||
index = self->head - delta; | |||
return read_codepoint(&self->text, index); | |||
} |
@@ -26,41 +26,38 @@ SOFTWARE. | |||
/* Functions */ | |||
int Tokenizer_push(Tokenizer*, uint64_t); | |||
int Tokenizer_push_textbuffer(Tokenizer*); | |||
void Tokenizer_delete_top_of_stack(Tokenizer*); | |||
PyObject* Tokenizer_pop(Tokenizer*); | |||
PyObject* Tokenizer_pop_keeping_context(Tokenizer*); | |||
void Tokenizer_memoize_bad_route(Tokenizer*); | |||
void* Tokenizer_fail_route(Tokenizer*); | |||
int Tokenizer_check_route(Tokenizer*, uint64_t); | |||
void Tokenizer_free_bad_route_tree(Tokenizer*); | |||
int Tokenizer_emit_token(Tokenizer*, PyObject*, int); | |||
int Tokenizer_emit_token_kwargs(Tokenizer*, PyObject*, PyObject*, int); | |||
int Tokenizer_emit_char(Tokenizer*, Py_UCS4); | |||
int Tokenizer_emit_text(Tokenizer*, const char*); | |||
int Tokenizer_emit_textbuffer(Tokenizer*, Textbuffer*); | |||
int Tokenizer_emit_all(Tokenizer*, PyObject*); | |||
int Tokenizer_emit_text_then_stack(Tokenizer*, const char*); | |||
Py_UCS4 Tokenizer_read(Tokenizer*, Py_ssize_t); | |||
Py_UCS4 Tokenizer_read_backwards(Tokenizer*, Py_ssize_t); | |||
int Tokenizer_push(Tokenizer *, uint64_t); | |||
int Tokenizer_push_textbuffer(Tokenizer *); | |||
void Tokenizer_delete_top_of_stack(Tokenizer *); | |||
PyObject *Tokenizer_pop(Tokenizer *); | |||
PyObject *Tokenizer_pop_keeping_context(Tokenizer *); | |||
void Tokenizer_memoize_bad_route(Tokenizer *); | |||
void *Tokenizer_fail_route(Tokenizer *); | |||
int Tokenizer_check_route(Tokenizer *, uint64_t); | |||
void Tokenizer_free_bad_route_tree(Tokenizer *); | |||
int Tokenizer_emit_token(Tokenizer *, PyObject *, int); | |||
int Tokenizer_emit_token_kwargs(Tokenizer *, PyObject *, PyObject *, int); | |||
int Tokenizer_emit_char(Tokenizer *, Py_UCS4); | |||
int Tokenizer_emit_text(Tokenizer *, const char *); | |||
int Tokenizer_emit_textbuffer(Tokenizer *, Textbuffer *); | |||
int Tokenizer_emit_all(Tokenizer *, PyObject *); | |||
int Tokenizer_emit_text_then_stack(Tokenizer *, const char *); | |||
Py_UCS4 Tokenizer_read(Tokenizer *, Py_ssize_t); | |||
Py_UCS4 Tokenizer_read_backwards(Tokenizer *, Py_ssize_t); | |||
/* Macros */ | |||
#define MAX_DEPTH 40 | |||
#define Tokenizer_CAN_RECURSE(self) \ | |||
(self->depth < MAX_DEPTH) | |||
#define Tokenizer_IS_CURRENT_STACK(self, id) \ | |||
(self->topstack->ident.head == (id).head && \ | |||
#define MAX_DEPTH 40 | |||
#define Tokenizer_CAN_RECURSE(self) (self->depth < MAX_DEPTH) | |||
#define Tokenizer_IS_CURRENT_STACK(self, id) \ | |||
(self->topstack->ident.head == (id).head && \ | |||
self->topstack->ident.context == (id).context) | |||
#define Tokenizer_emit(self, token) \ | |||
Tokenizer_emit_token(self, token, 0) | |||
#define Tokenizer_emit_first(self, token) \ | |||
Tokenizer_emit_token(self, token, 1) | |||
#define Tokenizer_emit_kwargs(self, token, kwargs) \ | |||
#define Tokenizer_emit(self, token) Tokenizer_emit_token(self, token, 0) | |||
#define Tokenizer_emit_first(self, token) Tokenizer_emit_token(self, token, 1) | |||
#define Tokenizer_emit_kwargs(self, token, kwargs) \ | |||
Tokenizer_emit_token_kwargs(self, token, kwargs, 0) | |||
#define Tokenizer_emit_first_kwargs(self, token, kwargs) \ | |||
#define Tokenizer_emit_first_kwargs(self, token, kwargs) \ | |||
Tokenizer_emit_token_kwargs(self, token, kwargs, 1) |
@@ -30,12 +30,12 @@ SOFTWARE. | |||
int route_state; | |||
uint64_t route_context; | |||
char** entitydefs; | |||
char **entitydefs; | |||
PyObject* NOARGS; | |||
PyObject* definitions; | |||
PyObject *NOARGS; | |||
PyObject *definitions; | |||
static PyObject* ParserError; | |||
static PyObject *ParserError; | |||
/* Forward declarations */ | |||
@@ -44,17 +44,18 @@ static int load_exceptions(void); | |||
/* | |||
Create a new tokenizer object. | |||
*/ | |||
static PyObject* | |||
Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds) | |||
static PyObject * | |||
Tokenizer_new(PyTypeObject *type, PyObject *args, PyObject *kwds) | |||
{ | |||
Tokenizer* self = (Tokenizer*) type->tp_alloc(type, 0); | |||
return (PyObject*) self; | |||
Tokenizer *self = (Tokenizer *) type->tp_alloc(type, 0); | |||
return (PyObject *) self; | |||
} | |||
/* | |||
Deallocate the given tokenizer's text field. | |||
*/ | |||
static void dealloc_tokenizer_text(TokenizerInput* text) | |||
static void | |||
dealloc_tokenizer_text(TokenizerInput *text) | |||
{ | |||
Py_XDECREF(text->object); | |||
} | |||
@@ -62,7 +63,8 @@ static void dealloc_tokenizer_text(TokenizerInput* text) | |||
/* | |||
Deallocate the given tokenizer object. | |||
*/ | |||
static void Tokenizer_dealloc(Tokenizer* self) | |||
static void | |||
Tokenizer_dealloc(Tokenizer *self) | |||
{ | |||
Stack *this = self->topstack, *next; | |||
dealloc_tokenizer_text(&self->text); | |||
@@ -74,13 +76,14 @@ static void Tokenizer_dealloc(Tokenizer* self) | |||
free(this); | |||
this = next; | |||
} | |||
Py_TYPE(self)->tp_free((PyObject*) self); | |||
Py_TYPE(self)->tp_free((PyObject *) self); | |||
} | |||
/* | |||
Initialize a new tokenizer instance's text field. | |||
*/ | |||
static void init_tokenizer_text(TokenizerInput* text) | |||
static void | |||
init_tokenizer_text(TokenizerInput *text) | |||
{ | |||
text->object = Py_None; | |||
Py_INCREF(Py_None); | |||
@@ -92,12 +95,14 @@ static void init_tokenizer_text(TokenizerInput* text) | |||
/* | |||
Initialize a new tokenizer instance by setting instance attributes. | |||
*/ | |||
static int Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds) | |||
static int | |||
Tokenizer_init(Tokenizer *self, PyObject *args, PyObject *kwds) | |||
{ | |||
static char* kwlist[] = {NULL}; | |||
static char *kwlist[] = {NULL}; | |||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "", kwlist)) | |||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "", kwlist)) { | |||
return -1; | |||
} | |||
init_tokenizer_text(&self->text); | |||
self->topstack = NULL; | |||
self->head = self->global = self->depth = 0; | |||
@@ -110,13 +115,15 @@ static int Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds) | |||
/* | |||
Load input text into the tokenizer. | |||
*/ | |||
static int load_tokenizer_text(TokenizerInput* text, PyObject *input) | |||
static int | |||
load_tokenizer_text(TokenizerInput *text, PyObject *input) | |||
{ | |||
dealloc_tokenizer_text(text); | |||
text->object = input; | |||
if (PyUnicode_READY(input) < 0) | |||
if (PyUnicode_READY(input) < 0) { | |||
return -1; | |||
} | |||
text->kind = PyUnicode_KIND(input); | |||
text->data = PyUnicode_DATA(input); | |||
text->length = PyUnicode_GET_LENGTH(input); | |||
@@ -126,30 +133,34 @@ static int load_tokenizer_text(TokenizerInput* text, PyObject *input) | |||
/* | |||
Build a list of tokens from a string of wikicode and return it. | |||
*/ | |||
static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) | |||
static PyObject * | |||
Tokenizer_tokenize(Tokenizer *self, PyObject *args) | |||
{ | |||
PyObject *input, *tokens; | |||
uint64_t context = 0; | |||
unsigned long long context = 0; | |||
int skip_style_tags = 0; | |||
if (PyArg_ParseTuple(args, "U|ii", &input, &context, &skip_style_tags)) { | |||
if (PyArg_ParseTuple(args, "U|Kp", &input, &context, &skip_style_tags)) { | |||
Py_INCREF(input); | |||
if (load_tokenizer_text(&self->text, input)) | |||
if (load_tokenizer_text(&self->text, input)) { | |||
return NULL; | |||
} | |||
else { | |||
} | |||
} else { | |||
const char *encoded; | |||
Py_ssize_t size; | |||
/* Failed to parse a Unicode object; try a string instead. */ | |||
PyErr_Clear(); | |||
if (!PyArg_ParseTuple(args, "s#|ii", &encoded, &size, &context, | |||
&skip_style_tags)) | |||
if (!PyArg_ParseTuple( | |||
args, "s#|Kp", &encoded, &size, &context, &skip_style_tags)) { | |||
return NULL; | |||
if (!(input = PyUnicode_FromStringAndSize(encoded, size))) | |||
} | |||
if (!(input = PyUnicode_FromStringAndSize(encoded, size))) { | |||
return NULL; | |||
if (load_tokenizer_text(&self->text, input)) | |||
} | |||
if (load_tokenizer_text(&self->text, input)) { | |||
return NULL; | |||
} | |||
} | |||
self->head = self->global = self->depth = 0; | |||
@@ -162,73 +173,83 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) | |||
if (!tokens || self->topstack) { | |||
Py_XDECREF(tokens); | |||
if (PyErr_Occurred()) | |||
if (PyErr_Occurred()) { | |||
return NULL; | |||
if (!ParserError && load_exceptions() < 0) | |||
} | |||
if (!ParserError && load_exceptions() < 0) { | |||
return NULL; | |||
} | |||
if (BAD_ROUTE) { | |||
RESET_ROUTE(); | |||
PyErr_SetString(ParserError, "C tokenizer exited with BAD_ROUTE"); | |||
} | |||
else if (self->topstack) | |||
} else if (self->topstack) { | |||
PyErr_SetString(ParserError, | |||
"C tokenizer exited with non-empty token stack"); | |||
else | |||
} else { | |||
PyErr_SetString(ParserError, "C tokenizer exited unexpectedly"); | |||
} | |||
return NULL; | |||
} | |||
return tokens; | |||
} | |||
static int load_entities(void) | |||
static int | |||
load_entities(void) | |||
{ | |||
PyObject *tempmod, *defmap, *deflist; | |||
unsigned numdefs, i; | |||
PyObject *string; | |||
tempmod = PyImport_ImportModule("html.entities"); | |||
if (!tempmod) | |||
if (!tempmod) { | |||
return -1; | |||
} | |||
defmap = PyObject_GetAttrString(tempmod, "entitydefs"); | |||
if (!defmap) | |||
if (!defmap) { | |||
return -1; | |||
} | |||
Py_DECREF(tempmod); | |||
deflist = PyDict_Keys(defmap); | |||
if (!deflist) | |||
if (!deflist) { | |||
return -1; | |||
} | |||
Py_DECREF(defmap); | |||
numdefs = (unsigned) PyList_GET_SIZE(deflist); | |||
entitydefs = calloc(numdefs + 1, sizeof(char*)); | |||
if (!entitydefs) | |||
entitydefs = calloc(numdefs + 1, sizeof(char *)); | |||
if (!entitydefs) { | |||
return -1; | |||
} | |||
for (i = 0; i < numdefs; i++) { | |||
string = PyUnicode_AsASCIIString(PyList_GET_ITEM(deflist, i)); | |||
if (!string) | |||
if (!string) { | |||
return -1; | |||
} | |||
entitydefs[i] = PyBytes_AsString(string); | |||
if (!entitydefs[i]) | |||
if (!entitydefs[i]) { | |||
return -1; | |||
} | |||
} | |||
Py_DECREF(deflist); | |||
return 0; | |||
} | |||
static int load_tokens(void) | |||
static int | |||
load_tokens(void) | |||
{ | |||
PyObject *tempmod, *tokens, | |||
*globals = PyEval_GetGlobals(), | |||
*locals = PyEval_GetLocals(), | |||
*fromlist = PyList_New(1), | |||
*modname = PyUnicode_FromString("tokens"); | |||
PyObject *tempmod, *tokens; | |||
PyObject *globals = PyEval_GetGlobals(), *locals = PyEval_GetLocals(), | |||
*fromlist = PyList_New(1), *modname = PyUnicode_FromString("tokens"); | |||
char *name = "mwparserfromhell.parser"; | |||
if (!fromlist || !modname) | |||
if (!fromlist || !modname) { | |||
return -1; | |||
} | |||
PyList_SET_ITEM(fromlist, 0, modname); | |||
tempmod = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0); | |||
Py_DECREF(fromlist); | |||
if (!tempmod) | |||
if (!tempmod) { | |||
return -1; | |||
} | |||
tokens = PyObject_GetAttrString(tempmod, "tokens"); | |||
Py_DECREF(tempmod); | |||
load_tokens_from_module(tokens); | |||
@@ -236,43 +257,45 @@ static int load_tokens(void) | |||
return 0; | |||
} | |||
static int load_defs(void) | |||
static int | |||
load_defs(void) | |||
{ | |||
PyObject *tempmod, | |||
*globals = PyEval_GetGlobals(), | |||
*locals = PyEval_GetLocals(), | |||
*fromlist = PyList_New(1), | |||
*modname = PyUnicode_FromString("definitions"); | |||
PyObject *tempmod; | |||
PyObject *globals = PyEval_GetGlobals(), *locals = PyEval_GetLocals(), | |||
*fromlist = PyList_New(1), *modname = PyUnicode_FromString("definitions"); | |||
char *name = "mwparserfromhell"; | |||
if (!fromlist || !modname) | |||
if (!fromlist || !modname) { | |||
return -1; | |||
} | |||
PyList_SET_ITEM(fromlist, 0, modname); | |||
tempmod = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0); | |||
Py_DECREF(fromlist); | |||
if (!tempmod) | |||
if (!tempmod) { | |||
return -1; | |||
} | |||
definitions = PyObject_GetAttrString(tempmod, "definitions"); | |||
Py_DECREF(tempmod); | |||
return 0; | |||
} | |||
static int load_exceptions(void) | |||
static int | |||
load_exceptions(void) | |||
{ | |||
PyObject *tempmod, *parsermod, | |||
*globals = PyEval_GetGlobals(), | |||
*locals = PyEval_GetLocals(), | |||
*fromlist = PyList_New(1), | |||
*modname = PyUnicode_FromString("parser"); | |||
PyObject *tempmod, *parsermod; | |||
PyObject *globals = PyEval_GetGlobals(), *locals = PyEval_GetLocals(), | |||
*fromlist = PyList_New(1), *modname = PyUnicode_FromString("parser"); | |||
char *name = "mwparserfromhell"; | |||
if (!fromlist || !modname) | |||
if (!fromlist || !modname) { | |||
return -1; | |||
} | |||
PyList_SET_ITEM(fromlist, 0, modname); | |||
tempmod = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0); | |||
Py_DECREF(fromlist); | |||
if (!tempmod) | |||
if (!tempmod) { | |||
return -1; | |||
} | |||
parsermod = PyObject_GetAttrString(tempmod, "parser"); | |||
Py_DECREF(tempmod); | |||
ParserError = PyObject_GetAttrString(parsermod, "ParserError"); | |||
@@ -280,22 +303,26 @@ static int load_exceptions(void) | |||
return 0; | |||
} | |||
PyMODINIT_FUNC PyInit__tokenizer(void) | |||
PyMODINIT_FUNC | |||
PyInit__tokenizer(void) | |||
{ | |||
PyObject *module; | |||
TokenizerType.tp_new = PyType_GenericNew; | |||
if (PyType_Ready(&TokenizerType) < 0) | |||
if (PyType_Ready(&TokenizerType) < 0) { | |||
return NULL; | |||
} | |||
module = PyModule_Create(&module_def); | |||
if (!module) | |||
if (!module) { | |||
return NULL; | |||
} | |||
Py_INCREF(&TokenizerType); | |||
PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType); | |||
PyModule_AddObject(module, "CTokenizer", (PyObject *) &TokenizerType); | |||
Py_INCREF(Py_True); | |||
PyDict_SetItemString(TokenizerType.tp_dict, "USES_C", Py_True); | |||
NOARGS = PyTuple_New(0); | |||
if (!NOARGS || load_entities() || load_tokens() || load_defs()) | |||
if (!NOARGS || load_entities() || load_tokens() || load_defs()) { | |||
return NULL; | |||
} | |||
return module; | |||
} |
@@ -27,67 +27,76 @@ SOFTWARE. | |||
/* Functions */ | |||
static PyObject* Tokenizer_new(PyTypeObject*, PyObject*, PyObject*); | |||
static void Tokenizer_dealloc(Tokenizer*); | |||
static int Tokenizer_init(Tokenizer*, PyObject*, PyObject*); | |||
static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*); | |||
static PyObject *Tokenizer_new(PyTypeObject *, PyObject *, PyObject *); | |||
static void Tokenizer_dealloc(Tokenizer *); | |||
static int Tokenizer_init(Tokenizer *, PyObject *, PyObject *); | |||
static PyObject *Tokenizer_tokenize(Tokenizer *, PyObject *); | |||
/* Structs */ | |||
static PyMethodDef Tokenizer_methods[] = { | |||
{"tokenize", (PyCFunction) Tokenizer_tokenize, METH_VARARGS, | |||
"Build a list of tokens from a string of wikicode and return it."}, | |||
{NULL} | |||
{ | |||
"tokenize", | |||
(PyCFunction) Tokenizer_tokenize, | |||
METH_VARARGS, | |||
"Build a list of tokens from a string of wikicode and return it.", | |||
}, | |||
{NULL}, | |||
}; | |||
static PyMemberDef Tokenizer_members[] = { | |||
{NULL} | |||
{NULL}, | |||
}; | |||
static PyTypeObject TokenizerType = { | |||
PyVarObject_HEAD_INIT(NULL, 0) | |||
"_tokenizer.CTokenizer", /* tp_name */ | |||
sizeof(Tokenizer), /* tp_basicsize */ | |||
0, /* tp_itemsize */ | |||
(destructor) Tokenizer_dealloc, /* tp_dealloc */ | |||
0, /* tp_print */ | |||
0, /* tp_getattr */ | |||
0, /* tp_setattr */ | |||
0, /* tp_compare */ | |||
0, /* tp_repr */ | |||
0, /* tp_as_number */ | |||
0, /* tp_as_sequence */ | |||
0, /* tp_as_mapping */ | |||
0, /* tp_hash */ | |||
0, /* tp_call */ | |||
0, /* tp_str */ | |||
0, /* tp_getattro */ | |||
0, /* tp_setattro */ | |||
0, /* tp_as_buffer */ | |||
Py_TPFLAGS_DEFAULT, /* tp_flags */ | |||
"Creates a list of tokens from a string of wikicode.", /* tp_doc */ | |||
0, /* tp_traverse */ | |||
0, /* tp_clear */ | |||
0, /* tp_richcompare */ | |||
0, /* tp_weaklistoffset */ | |||
0, /* tp_iter */ | |||
0, /* tp_iternext */ | |||
Tokenizer_methods, /* tp_methods */ | |||
Tokenizer_members, /* tp_members */ | |||
0, /* tp_getset */ | |||
0, /* tp_base */ | |||
0, /* tp_dict */ | |||
0, /* tp_descr_get */ | |||
0, /* tp_descr_set */ | |||
0, /* tp_dictoffset */ | |||
(initproc) Tokenizer_init, /* tp_init */ | |||
0, /* tp_alloc */ | |||
Tokenizer_new, /* tp_new */ | |||
PyVarObject_HEAD_INIT(NULL, 0) /* header */ | |||
"_tokenizer.CTokenizer", /* tp_name */ | |||
sizeof(Tokenizer), /* tp_basicsize */ | |||
0, /* tp_itemsize */ | |||
(destructor) Tokenizer_dealloc, /* tp_dealloc */ | |||
0, /* tp_print */ | |||
0, /* tp_getattr */ | |||
0, /* tp_setattr */ | |||
0, /* tp_compare */ | |||
0, /* tp_repr */ | |||
0, /* tp_as_number */ | |||
0, /* tp_as_sequence */ | |||
0, /* tp_as_mapping */ | |||
0, /* tp_hash */ | |||
0, /* tp_call */ | |||
0, /* tp_str */ | |||
0, /* tp_getattro */ | |||
0, /* tp_setattro */ | |||
0, /* tp_as_buffer */ | |||
Py_TPFLAGS_DEFAULT, /* tp_flags */ | |||
"Creates a list of tokens from a string of wikicode.", /* tp_doc */ | |||
0, /* tp_traverse */ | |||
0, /* tp_clear */ | |||
0, /* tp_richcompare */ | |||
0, /* tp_weaklistoffset */ | |||
0, /* tp_iter */ | |||
0, /* tp_iternext */ | |||
Tokenizer_methods, /* tp_methods */ | |||
Tokenizer_members, /* tp_members */ | |||
0, /* tp_getset */ | |||
0, /* tp_base */ | |||
0, /* tp_dict */ | |||
0, /* tp_descr_get */ | |||
0, /* tp_descr_set */ | |||
0, /* tp_dictoffset */ | |||
(initproc) Tokenizer_init, /* tp_init */ | |||
0, /* tp_alloc */ | |||
Tokenizer_new, /* tp_new */ | |||
}; | |||
static PyModuleDef module_def = { | |||
PyModuleDef_HEAD_INIT, | |||
"_tokenizer", | |||
"Creates a list of tokens from a string of wikicode.", | |||
-1, NULL, NULL, NULL, NULL, NULL | |||
-1, | |||
NULL, | |||
NULL, | |||
NULL, | |||
NULL, | |||
NULL, | |||
}; |
@@ -24,56 +24,55 @@ SOFTWARE. | |||
/* Globals */ | |||
PyObject* Text; | |||
PyObject* TemplateOpen; | |||
PyObject* TemplateParamSeparator; | |||
PyObject* TemplateParamEquals; | |||
PyObject* TemplateClose; | |||
PyObject* ArgumentOpen; | |||
PyObject* ArgumentSeparator; | |||
PyObject* ArgumentClose; | |||
PyObject* WikilinkOpen; | |||
PyObject* WikilinkSeparator; | |||
PyObject* WikilinkClose; | |||
PyObject* ExternalLinkOpen; | |||
PyObject* ExternalLinkSeparator; | |||
PyObject* ExternalLinkClose; | |||
PyObject* HTMLEntityStart; | |||
PyObject* HTMLEntityNumeric; | |||
PyObject* HTMLEntityHex; | |||
PyObject* HTMLEntityEnd; | |||
PyObject* HeadingStart; | |||
PyObject* HeadingEnd; | |||
PyObject* CommentStart; | |||
PyObject* CommentEnd; | |||
PyObject* TagOpenOpen; | |||
PyObject* TagAttrStart; | |||
PyObject* TagAttrEquals; | |||
PyObject* TagAttrQuote; | |||
PyObject* TagCloseOpen; | |||
PyObject* TagCloseSelfclose; | |||
PyObject* TagOpenClose; | |||
PyObject* TagCloseClose; | |||
PyObject *Text; | |||
PyObject *TemplateOpen; | |||
PyObject *TemplateParamSeparator; | |||
PyObject *TemplateParamEquals; | |||
PyObject *TemplateClose; | |||
PyObject *ArgumentOpen; | |||
PyObject *ArgumentSeparator; | |||
PyObject *ArgumentClose; | |||
PyObject *WikilinkOpen; | |||
PyObject *WikilinkSeparator; | |||
PyObject *WikilinkClose; | |||
PyObject *ExternalLinkOpen; | |||
PyObject *ExternalLinkSeparator; | |||
PyObject *ExternalLinkClose; | |||
PyObject *HTMLEntityStart; | |||
PyObject *HTMLEntityNumeric; | |||
PyObject *HTMLEntityHex; | |||
PyObject *HTMLEntityEnd; | |||
PyObject *HeadingStart; | |||
PyObject *HeadingEnd; | |||
PyObject *CommentStart; | |||
PyObject *CommentEnd; | |||
PyObject *TagOpenOpen; | |||
PyObject *TagAttrStart; | |||
PyObject *TagAttrEquals; | |||
PyObject *TagAttrQuote; | |||
PyObject *TagCloseOpen; | |||
PyObject *TagCloseSelfclose; | |||
PyObject *TagOpenClose; | |||
PyObject *TagCloseClose; | |||
/* | |||
Load individual tokens into globals from the given Python module object. | |||
*/ | |||
void load_tokens_from_module(PyObject* module) | |||
void | |||
load_tokens_from_module(PyObject *module) | |||
{ | |||
Text = PyObject_GetAttrString(module, "Text"); | |||
TemplateOpen = PyObject_GetAttrString(module, "TemplateOpen"); | |||
TemplateParamSeparator = PyObject_GetAttrString(module, | |||
"TemplateParamSeparator"); | |||
TemplateParamEquals = PyObject_GetAttrString(module, | |||
"TemplateParamEquals"); | |||
TemplateParamSeparator = PyObject_GetAttrString(module, "TemplateParamSeparator"); | |||
TemplateParamEquals = PyObject_GetAttrString(module, "TemplateParamEquals"); | |||
TemplateClose = PyObject_GetAttrString(module, "TemplateClose"); | |||
ArgumentOpen = PyObject_GetAttrString(module, "ArgumentOpen"); | |||
@@ -85,8 +84,7 @@ void load_tokens_from_module(PyObject* module) | |||
WikilinkClose = PyObject_GetAttrString(module, "WikilinkClose"); | |||
ExternalLinkOpen = PyObject_GetAttrString(module, "ExternalLinkOpen"); | |||
ExternalLinkSeparator = PyObject_GetAttrString(module, | |||
"ExternalLinkSeparator"); | |||
ExternalLinkSeparator = PyObject_GetAttrString(module, "ExternalLinkSeparator"); | |||
ExternalLinkClose = PyObject_GetAttrString(module, "ExternalLinkClose"); | |||
HTMLEntityStart = PyObject_GetAttrString(module, "HTMLEntityStart"); | |||
@@ -26,44 +26,44 @@ SOFTWARE. | |||
/* Token globals */ | |||
extern PyObject* Text; | |||
extern PyObject* TemplateOpen; | |||
extern PyObject* TemplateParamSeparator; | |||
extern PyObject* TemplateParamEquals; | |||
extern PyObject* TemplateClose; | |||
extern PyObject* ArgumentOpen; | |||
extern PyObject* ArgumentSeparator; | |||
extern PyObject* ArgumentClose; | |||
extern PyObject* WikilinkOpen; | |||
extern PyObject* WikilinkSeparator; | |||
extern PyObject* WikilinkClose; | |||
extern PyObject* ExternalLinkOpen; | |||
extern PyObject* ExternalLinkSeparator; | |||
extern PyObject* ExternalLinkClose; | |||
extern PyObject* HTMLEntityStart; | |||
extern PyObject* HTMLEntityNumeric; | |||
extern PyObject* HTMLEntityHex; | |||
extern PyObject* HTMLEntityEnd; | |||
extern PyObject* HeadingStart; | |||
extern PyObject* HeadingEnd; | |||
extern PyObject* CommentStart; | |||
extern PyObject* CommentEnd; | |||
extern PyObject* TagOpenOpen; | |||
extern PyObject* TagAttrStart; | |||
extern PyObject* TagAttrEquals; | |||
extern PyObject* TagAttrQuote; | |||
extern PyObject* TagCloseOpen; | |||
extern PyObject* TagCloseSelfclose; | |||
extern PyObject* TagOpenClose; | |||
extern PyObject* TagCloseClose; | |||
extern PyObject *Text; | |||
extern PyObject *TemplateOpen; | |||
extern PyObject *TemplateParamSeparator; | |||
extern PyObject *TemplateParamEquals; | |||
extern PyObject *TemplateClose; | |||
extern PyObject *ArgumentOpen; | |||
extern PyObject *ArgumentSeparator; | |||
extern PyObject *ArgumentClose; | |||
extern PyObject *WikilinkOpen; | |||
extern PyObject *WikilinkSeparator; | |||
extern PyObject *WikilinkClose; | |||
extern PyObject *ExternalLinkOpen; | |||
extern PyObject *ExternalLinkSeparator; | |||
extern PyObject *ExternalLinkClose; | |||
extern PyObject *HTMLEntityStart; | |||
extern PyObject *HTMLEntityNumeric; | |||
extern PyObject *HTMLEntityHex; | |||
extern PyObject *HTMLEntityEnd; | |||
extern PyObject *HeadingStart; | |||
extern PyObject *HeadingEnd; | |||
extern PyObject *CommentStart; | |||
extern PyObject *CommentEnd; | |||
extern PyObject *TagOpenOpen; | |||
extern PyObject *TagAttrStart; | |||
extern PyObject *TagAttrEquals; | |||
extern PyObject *TagAttrQuote; | |||
extern PyObject *TagCloseOpen; | |||
extern PyObject *TagCloseSelfclose; | |||
extern PyObject *TagOpenClose; | |||
extern PyObject *TagCloseClose; | |||
/* Functions */ | |||
void load_tokens_from_module(PyObject*); | |||
void load_tokens_from_module(PyObject *); |
@@ -20,6 +20,7 @@ | |||
__all__ = ["ParserError"] | |||
class ParserError(Exception): | |||
"""Exception raised when an internal error occurs while parsing. | |||
@@ -28,6 +29,7 @@ class ParserError(Exception): | |||
with an impossible internal state and is bailing out before other problems | |||
can happen. Its appearance indicates a bug. | |||
""" | |||
def __init__(self, extra): | |||
msg = "This is a bug and should be reported. Info: {}.".format(extra) | |||
super().__init__(msg) |
@@ -1,4 +1,4 @@ | |||
# Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# Copyright (C) 2012-2021 Ben Kurtovic <ben.kurtovic@gmail.com> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -24,11 +24,17 @@ import re | |||
from . import contexts, tokens | |||
from .errors import ParserError | |||
from ..definitions import (get_html_tag, is_parsable, is_single, | |||
is_single_only, is_scheme) | |||
from ..definitions import ( | |||
get_html_tag, | |||
is_parsable, | |||
is_single, | |||
is_single_only, | |||
is_scheme, | |||
) | |||
__all__ = ["Tokenizer"] | |||
class BadRoute(Exception): | |||
"""Raised internally when the current tokenization route is invalid.""" | |||
@@ -39,14 +45,15 @@ class BadRoute(Exception): | |||
class _TagOpenData: | |||
"""Stores data about an HTML open tag, like ``<ref name="foo">``.""" | |||
CX_NAME = 1 << 0 | |||
CX_ATTR_READY = 1 << 1 | |||
CX_ATTR_NAME = 1 << 2 | |||
CX_ATTR_VALUE = 1 << 3 | |||
CX_QUOTED = 1 << 4 | |||
CX_NOTE_SPACE = 1 << 5 | |||
CX_NAME = 1 << 0 | |||
CX_ATTR_READY = 1 << 1 | |||
CX_ATTR_NAME = 1 << 2 | |||
CX_ATTR_VALUE = 1 << 3 | |||
CX_QUOTED = 1 << 4 | |||
CX_NOTE_SPACE = 1 << 5 | |||
CX_NOTE_EQUALS = 1 << 6 | |||
CX_NOTE_QUOTE = 1 << 7 | |||
CX_NOTE_QUOTE = 1 << 7 | |||
def __init__(self): | |||
self.context = self.CX_NAME | |||
@@ -57,11 +64,34 @@ class _TagOpenData: | |||
class Tokenizer: | |||
"""Creates a list of tokens from a string of wikicode.""" | |||
USES_C = False | |||
START = object() | |||
END = object() | |||
MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "'", "#", "*", ";", | |||
":", "/", "-", "!", "\n", START, END] | |||
MARKERS = [ | |||
"{", | |||
"}", | |||
"[", | |||
"]", | |||
"<", | |||
">", | |||
"|", | |||
"=", | |||
"&", | |||
"'", | |||
'"', | |||
"#", | |||
"*", | |||
";", | |||
":", | |||
"/", | |||
"-", | |||
"!", | |||
"\n", | |||
START, | |||
END, | |||
] | |||
URISCHEME = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-" | |||
MAX_DEPTH = 40 | |||
regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE) | |||
tag_splitter = re.compile(r"([\s\"\'\\]+)") | |||
@@ -323,7 +353,7 @@ class Tokenizer: | |||
self._head += 2 | |||
try: | |||
# If the wikilink looks like an external link, parse it as such: | |||
link, _extra, _delta = self._really_parse_external_link(True) | |||
link, _extra = self._really_parse_external_link(True) | |||
except BadRoute: | |||
self._head = reset + 1 | |||
try: | |||
@@ -366,8 +396,7 @@ class Tokenizer: | |||
self._emit_text("//") | |||
self._head += 2 | |||
else: | |||
valid = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-" | |||
all_valid = lambda: all(char in valid for char in self._read()) | |||
all_valid = lambda: all(char in self.URISCHEME for char in self._read()) | |||
scheme = "" | |||
while self._read() is not self.END and all_valid(): | |||
scheme += self._read() | |||
@@ -386,17 +415,16 @@ class Tokenizer: | |||
def _parse_free_uri_scheme(self): | |||
"""Parse the URI scheme of a free (no brackets) external link.""" | |||
valid = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-" | |||
scheme = [] | |||
try: | |||
# We have to backtrack through the textbuffer looking for our | |||
# scheme since it was just parsed as text: | |||
for chunk in reversed(self._textbuffer): | |||
for char in reversed(chunk): | |||
# stop at the first non-word character | |||
# Stop at the first non-word character | |||
if re.fullmatch(r"\W", char): | |||
raise StopIteration() | |||
if char not in valid: | |||
if char not in self.URISCHEME: | |||
raise BadRoute() | |||
scheme.append(char) | |||
except StopIteration: | |||
@@ -434,23 +462,26 @@ class Tokenizer: | |||
self._emit_text(this) | |||
return punct, tail | |||
def _is_free_link_end(self, this, nxt): | |||
"""Return whether the current head is the end of a free link.""" | |||
def _is_uri_end(self, this, nxt): | |||
"""Return whether the current head is the end of a URI.""" | |||
# Built from _parse()'s end sentinels: | |||
after, ctx = self._read(2), self._context | |||
equal_sign_contexts = contexts.TEMPLATE_PARAM_KEY | contexts.HEADING | |||
return (this in (self.END, "\n", "[", "]", "<", ">", "\"") or | |||
this == nxt == "'" or | |||
(this == "|" and ctx & contexts.TEMPLATE) or | |||
(this == "=" and ctx & equal_sign_contexts) or | |||
(this == nxt == "}" and ctx & contexts.TEMPLATE) or | |||
(this == nxt == after == "}" and ctx & contexts.ARGUMENT)) | |||
return ( | |||
this in (self.END, "\n", "[", "]", "<", ">", '"') | |||
or " " in this | |||
or this == nxt == "'" | |||
or (this == "|" and ctx & contexts.TEMPLATE) | |||
or (this == "=" and ctx & (contexts.TEMPLATE_PARAM_KEY | contexts.HEADING)) | |||
or (this == nxt == "}" and ctx & contexts.TEMPLATE) | |||
or (this == nxt == after == "}" and ctx & contexts.ARGUMENT) | |||
) | |||
def _really_parse_external_link(self, brackets): | |||
"""Really parse an external link.""" | |||
if brackets: | |||
self._parse_bracketed_uri_scheme() | |||
invalid = ("\n", " ", "]") | |||
punct = () | |||
else: | |||
self._parse_free_uri_scheme() | |||
invalid = ("\n", " ", "[", "]") | |||
@@ -465,53 +496,47 @@ class Tokenizer: | |||
self._emit_text(tail) | |||
tail = "" | |||
self._parse_entity() | |||
elif (this == "<" and nxt == "!" and self._read(2) == | |||
self._read(3) == "-"): | |||
elif this == "<" and nxt == "!" and self._read(2) == self._read(3) == "-": | |||
if tail: | |||
self._emit_text(tail) | |||
tail = "" | |||
self._parse_comment() | |||
elif not brackets and self._is_free_link_end(this, nxt): | |||
return self._pop(), tail, -1 | |||
elif this is self.END or this == "\n": | |||
self._fail_route() | |||
elif this == nxt == "{" and self._can_recurse(): | |||
if tail: | |||
self._emit_text(tail) | |||
tail = "" | |||
self._parse_template_or_argument() | |||
elif this == "]": | |||
return self._pop(), tail, 0 | |||
elif this == "'" and nxt == "'": | |||
separator = tokens.ExternalLinkSeparator() | |||
separator.suppress_space = True | |||
self._emit(separator) | |||
self._context ^= contexts.EXT_LINK_URI | |||
self._context |= contexts.EXT_LINK_TITLE | |||
return self._parse(push=False), None, 0 | |||
elif any(ch in this for ch in (" ", "\n", "[", "]", "<", ">", | |||
"\"")): | |||
before, after = re.split(r"[ \n[\]<>\"]", this, maxsplit=1) | |||
delimiter = this[len(before)] | |||
if brackets: | |||
self._emit_text(before) | |||
separator = tokens.ExternalLinkSeparator() | |||
if delimiter != " ": | |||
elif brackets: | |||
if this is self.END or this == "\n": | |||
self._fail_route() | |||
if this == "]": | |||
return self._pop(), None | |||
if self._is_uri_end(this, nxt): | |||
if " " in this: | |||
before, after = this.split(" ", 1) | |||
self._emit_text(before) | |||
self._emit(tokens.ExternalLinkSeparator()) | |||
if after: | |||
self._emit_text(after) | |||
self._head += 1 | |||
else: | |||
separator = tokens.ExternalLinkSeparator() | |||
separator.suppress_space = True | |||
self._emit(separator) | |||
if after: | |||
self._emit_text(after) | |||
self._emit(separator) | |||
self._context ^= contexts.EXT_LINK_URI | |||
self._context |= contexts.EXT_LINK_TITLE | |||
if delimiter == " ": | |||
self._head += 1 | |||
return self._parse(push=False), None, 0 | |||
punct, tail = self._handle_free_link_text(punct, tail, before) | |||
return self._pop(), tail + " " + after, 0 | |||
elif not brackets: | |||
punct, tail = self._handle_free_link_text(punct, tail, this) | |||
else: | |||
return self._parse(push=False), None | |||
self._emit_text(this) | |||
else: | |||
if self._is_uri_end(this, nxt): | |||
if this is not self.END and " " in this: | |||
before, after = this.split(" ", 1) | |||
punct, tail = self._handle_free_link_text(punct, tail, before) | |||
tail += " " + after | |||
else: | |||
self._head -= 1 | |||
return self._pop(), tail | |||
punct, tail = self._handle_free_link_text(punct, tail, this) | |||
self._head += 1 | |||
def _remove_uri_scheme_from_textbuffer(self, scheme): | |||
@@ -536,7 +561,7 @@ class Tokenizer: | |||
reset = self._head | |||
self._head += 1 | |||
try: | |||
link, extra, delta = self._really_parse_external_link(brackets) | |||
link, extra = self._really_parse_external_link(brackets) | |||
except BadRoute: | |||
self._head = reset | |||
if not brackets and self._context & contexts.DL_TERM: | |||
@@ -550,7 +575,6 @@ class Tokenizer: | |||
self._emit(tokens.ExternalLinkOpen(brackets=brackets)) | |||
self._emit_all(link) | |||
self._emit(tokens.ExternalLinkClose()) | |||
self._head += delta | |||
if extra: | |||
self._emit_text(extra) | |||
@@ -688,9 +712,13 @@ class Tokenizer: | |||
self._emit_first(tokens.TagAttrQuote(char=data.quoter)) | |||
self._emit_all(self._pop()) | |||
buf = data.padding_buffer | |||
self._emit_first(tokens.TagAttrStart( | |||
pad_first=buf["first"], pad_before_eq=buf["before_eq"], | |||
pad_after_eq=buf["after_eq"])) | |||
self._emit_first( | |||
tokens.TagAttrStart( | |||
pad_first=buf["first"], | |||
pad_before_eq=buf["before_eq"], | |||
pad_after_eq=buf["after_eq"], | |||
) | |||
) | |||
self._emit_all(self._pop()) | |||
for key in data.padding_buffer: | |||
data.padding_buffer[key] = "" | |||
@@ -698,7 +726,9 @@ class Tokenizer: | |||
def _handle_tag_space(self, data, text): | |||
"""Handle whitespace (*text*) inside of an HTML open tag.""" | |||
ctx = data.context | |||
end_of_value = ctx & data.CX_ATTR_VALUE and not ctx & (data.CX_QUOTED | data.CX_NOTE_QUOTE) | |||
end_of_value = ctx & data.CX_ATTR_VALUE and not ctx & ( | |||
data.CX_QUOTED | data.CX_NOTE_QUOTE | |||
) | |||
if end_of_value or (ctx & data.CX_QUOTED and ctx & data.CX_NOTE_SPACE): | |||
self._push_tag_buffer(data) | |||
data.context = data.CX_ATTR_READY | |||
@@ -799,8 +829,10 @@ class Tokenizer: | |||
"""Handle the ending of a closing tag (``</foo>``).""" | |||
strip = lambda tok: tok.text.rstrip().lower() | |||
closing = self._pop() | |||
if len(closing) != 1 or (not isinstance(closing[0], tokens.Text) or | |||
strip(closing[0]) != strip(self._stack[1])): | |||
if len(closing) != 1 or ( | |||
not isinstance(closing[0], tokens.Text) | |||
or strip(closing[0]) != strip(self._stack[1]) | |||
): | |||
self._fail_route() | |||
self._emit_all(closing) | |||
self._emit(tokens.TagCloseClose()) | |||
@@ -815,8 +847,9 @@ class Tokenizer: | |||
self._fail_route() | |||
elif this == "<" and nxt == "/": | |||
self._head += 3 | |||
if self._read() != ">" or (strip(self._read(-1)) != | |||
strip(self._stack[1].text)): | |||
if self._read() != ">" or ( | |||
strip(self._read(-1)) != strip(self._stack[1].text) | |||
): | |||
self._head -= 1 | |||
self._emit_text("</") | |||
continue | |||
@@ -854,8 +887,8 @@ class Tokenizer: | |||
depth -= 1 | |||
if depth == 0: # pragma: no cover (untestable/exceptional) | |||
raise ParserError( | |||
"_handle_single_tag_end() got an unexpected " | |||
"TagCloseSelfclose") | |||
"_handle_single_tag_end() got an unexpected TagCloseSelfclose" | |||
) | |||
else: # pragma: no cover (untestable/exceptional case) | |||
raise ParserError("_handle_single_tag_end() missed a TagCloseOpen") | |||
padding = stack[index].padding | |||
@@ -869,8 +902,10 @@ class Tokenizer: | |||
self._emit(tokens.TagOpenOpen()) | |||
while True: | |||
this, nxt = self._read(), self._read(1) | |||
can_exit = (not data.context & (data.CX_QUOTED | data.CX_NAME) or | |||
data.context & data.CX_NOTE_SPACE) | |||
can_exit = ( | |||
not data.context & (data.CX_QUOTED | data.CX_NAME) | |||
or data.context & data.CX_NOTE_SPACE | |||
) | |||
if this is self.END: | |||
if self._context & contexts.TAG_ATTR: | |||
if data.context & data.CX_QUOTED: | |||
@@ -1086,16 +1121,25 @@ class Tokenizer: | |||
else: | |||
self._emit_text("\n") | |||
def _emit_table_tag(self, open_open_markup, tag, style, padding, | |||
close_open_markup, contents, open_close_markup): | |||
def _emit_table_tag( | |||
self, | |||
open_open_markup, | |||
tag, | |||
style, | |||
padding, | |||
close_open_markup, | |||
contents, | |||
open_close_markup, | |||
): | |||
"""Emit a table tag.""" | |||
self._emit(tokens.TagOpenOpen(wiki_markup=open_open_markup)) | |||
self._emit_text(tag) | |||
if style: | |||
self._emit_all(style) | |||
if close_open_markup: | |||
self._emit(tokens.TagCloseOpen(wiki_markup=close_open_markup, | |||
padding=padding)) | |||
self._emit( | |||
tokens.TagCloseOpen(wiki_markup=close_open_markup, padding=padding) | |||
) | |||
else: | |||
self._emit(tokens.TagCloseOpen(padding=padding)) | |||
if contents: | |||
@@ -1110,8 +1154,9 @@ class Tokenizer: | |||
data.context = _TagOpenData.CX_ATTR_READY | |||
while True: | |||
this = self._read() | |||
can_exit = (not data.context & data.CX_QUOTED or | |||
data.context & data.CX_NOTE_SPACE) | |||
can_exit = ( | |||
not data.context & data.CX_QUOTED or data.context & data.CX_NOTE_SPACE | |||
) | |||
if this == end_token and can_exit: | |||
if data.context & (data.CX_ATTR_NAME | data.CX_ATTR_VALUE): | |||
self._push_tag_buffer(data) | |||
@@ -1194,30 +1239,34 @@ class Tokenizer: | |||
self._head -= 1 | |||
return | |||
cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | | |||
line_context | contexts.TABLE_CELL_STYLE) | |||
cell = self._parse( | |||
contexts.TABLE_OPEN | |||
| contexts.TABLE_CELL_OPEN | |||
| line_context | |||
| contexts.TABLE_CELL_STYLE | |||
) | |||
cell_context = self._context | |||
self._context = old_context | |||
reset_for_style = cell_context & contexts.TABLE_CELL_STYLE | |||
if reset_for_style: | |||
self._head = reset | |||
self._push(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | | |||
line_context) | |||
self._push(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context) | |||
padding = self._handle_table_style("|") | |||
style = self._pop() | |||
# Don't parse the style separator: | |||
self._head += 1 | |||
cell = self._parse(contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | | |||
line_context) | |||
cell = self._parse( | |||
contexts.TABLE_OPEN | contexts.TABLE_CELL_OPEN | line_context | |||
) | |||
cell_context = self._context | |||
self._context = old_context | |||
close_open_markup = "|" if reset_for_style else None | |||
self._emit_table_tag(markup, tag, style, padding, close_open_markup, | |||
cell, "") | |||
self._emit_table_tag(markup, tag, style, padding, close_open_markup, cell, "") | |||
# Keep header/cell line contexts: | |||
self._context |= cell_context & (contexts.TABLE_TH_LINE | | |||
contexts.TABLE_TD_LINE) | |||
self._context |= cell_context & ( | |||
contexts.TABLE_TH_LINE | contexts.TABLE_TD_LINE | |||
) | |||
# Offset displacement done by parse(): | |||
self._head -= 1 | |||
@@ -1340,7 +1389,11 @@ class Tokenizer: | |||
elif this == "|" and self._context & contexts.TEMPLATE: | |||
self._handle_template_param() | |||
elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY: | |||
if not self._global & contexts.GL_HEADING and self._read(-1) in ("\n", self.START) and nxt == "=": | |||
if ( | |||
not self._global & contexts.GL_HEADING | |||
and self._read(-1) in ("\n", self.START) | |||
and nxt == "=" | |||
): | |||
self._parse_heading() | |||
else: | |||
self._handle_template_param_value() | |||
@@ -1369,7 +1422,11 @@ class Tokenizer: | |||
self._parse_external_link(False) | |||
elif this == "]" and self._context & contexts.EXT_LINK_TITLE: | |||
return self._pop() | |||
elif this == "=" and not self._global & contexts.GL_HEADING and not self._context & contexts.TEMPLATE: | |||
elif ( | |||
this == "=" | |||
and not self._global & contexts.GL_HEADING | |||
and not self._context & contexts.TEMPLATE | |||
): | |||
if self._read(-1) in ("\n", self.START): | |||
self._parse_heading() | |||
else: | |||
@@ -1404,7 +1461,8 @@ class Tokenizer: | |||
elif self._read(-1) in ("\n", self.START) and this in ("#", "*", ";", ":"): | |||
self._handle_list() | |||
elif self._read(-1) in ("\n", self.START) and ( | |||
this == nxt == self._read(2) == self._read(3) == "-"): | |||
this == nxt == self._read(2) == self._read(3) == "-" | |||
): | |||
self._handle_hr() | |||
elif this in ("\n", ":") and self._context & contexts.DL_TERM: | |||
self._handle_dl_term() | |||
@@ -1412,9 +1470,17 @@ class Tokenizer: | |||
# Kill potential table contexts | |||
self._context &= ~contexts.TABLE_CELL_LINE_CONTEXTS | |||
# Start of table parsing | |||
elif this == "{" and nxt == "|" and ( | |||
self._read(-1) in ("\n", self.START) or | |||
(self._read(-2) in ("\n", self.START) and self._read(-1).isspace())): | |||
elif ( | |||
this == "{" | |||
and nxt == "|" | |||
and ( | |||
self._read(-1) in ("\n", self.START) | |||
or ( | |||
self._read(-2) in ("\n", self.START) | |||
and self._read(-1).isspace() | |||
) | |||
) | |||
): | |||
if self._can_recurse(): | |||
self._parse_table() | |||
else: | |||
@@ -1438,8 +1504,9 @@ class Tokenizer: | |||
elif this == "\n" and self._context & contexts.TABLE_CELL_LINE_CONTEXTS: | |||
self._context &= ~contexts.TABLE_CELL_LINE_CONTEXTS | |||
self._emit_text(this) | |||
elif (self._read(-1) in ("\n", self.START) or | |||
(self._read(-2) in ("\n", self.START) and self._read(-1).isspace())): | |||
elif self._read(-1) in ("\n", self.START) or ( | |||
self._read(-2) in ("\n", self.START) and self._read(-1).isspace() | |||
): | |||
if this == "|" and nxt == "}": | |||
if self._context & contexts.TABLE_CELL_OPEN: | |||
return self._handle_table_cell_end() | |||
@@ -28,6 +28,7 @@ the :class`.Wikicode` tree by the :class:`.Builder`. | |||
__all__ = ["Token"] | |||
class Token(dict): | |||
"""A token stores the semantic meaning of a unit of wikicode.""" | |||
@@ -61,43 +62,44 @@ def make(name): | |||
__all__.append(name) | |||
return type(name, (Token,), {}) | |||
Text = make("Text") | |||
TemplateOpen = make("TemplateOpen") # {{ | |||
TemplateParamSeparator = make("TemplateParamSeparator") # | | |||
TemplateParamEquals = make("TemplateParamEquals") # = | |||
TemplateClose = make("TemplateClose") # }} | |||
TemplateOpen = make("TemplateOpen") # {{ | |||
TemplateParamSeparator = make("TemplateParamSeparator") # | | |||
TemplateParamEquals = make("TemplateParamEquals") # = | |||
TemplateClose = make("TemplateClose") # }} | |||
ArgumentOpen = make("ArgumentOpen") # {{{ | |||
ArgumentSeparator = make("ArgumentSeparator") # | | |||
ArgumentClose = make("ArgumentClose") # }}} | |||
ArgumentOpen = make("ArgumentOpen") # {{{ | |||
ArgumentSeparator = make("ArgumentSeparator") # | | |||
ArgumentClose = make("ArgumentClose") # }}} | |||
WikilinkOpen = make("WikilinkOpen") # [[ | |||
WikilinkSeparator = make("WikilinkSeparator") # | | |||
WikilinkClose = make("WikilinkClose") # ]] | |||
WikilinkOpen = make("WikilinkOpen") # [[ | |||
WikilinkSeparator = make("WikilinkSeparator") # | | |||
WikilinkClose = make("WikilinkClose") # ]] | |||
ExternalLinkOpen = make("ExternalLinkOpen") # [ | |||
ExternalLinkSeparator = make("ExternalLinkSeparator") # | |||
ExternalLinkClose = make("ExternalLinkClose") # ] | |||
ExternalLinkOpen = make("ExternalLinkOpen") # [ | |||
ExternalLinkSeparator = make("ExternalLinkSeparator") # | |||
ExternalLinkClose = make("ExternalLinkClose") # ] | |||
HTMLEntityStart = make("HTMLEntityStart") # & | |||
HTMLEntityNumeric = make("HTMLEntityNumeric") # # | |||
HTMLEntityHex = make("HTMLEntityHex") # x | |||
HTMLEntityEnd = make("HTMLEntityEnd") # ; | |||
HTMLEntityStart = make("HTMLEntityStart") # & | |||
HTMLEntityNumeric = make("HTMLEntityNumeric") # # | |||
HTMLEntityHex = make("HTMLEntityHex") # x | |||
HTMLEntityEnd = make("HTMLEntityEnd") # ; | |||
HeadingStart = make("HeadingStart") # =... | |||
HeadingEnd = make("HeadingEnd") # =... | |||
HeadingStart = make("HeadingStart") # =... | |||
HeadingEnd = make("HeadingEnd") # =... | |||
CommentStart = make("CommentStart") # <!-- | |||
CommentEnd = make("CommentEnd") # --> | |||
CommentStart = make("CommentStart") # <!-- | |||
CommentEnd = make("CommentEnd") # --> | |||
TagOpenOpen = make("TagOpenOpen") # < | |||
TagOpenOpen = make("TagOpenOpen") # < | |||
TagAttrStart = make("TagAttrStart") | |||
TagAttrEquals = make("TagAttrEquals") # = | |||
TagAttrQuote = make("TagAttrQuote") # ", ' | |||
TagCloseOpen = make("TagCloseOpen") # > | |||
TagCloseSelfclose = make("TagCloseSelfclose") # /> | |||
TagOpenClose = make("TagOpenClose") # </ | |||
TagCloseClose = make("TagCloseClose") # > | |||
TagAttrEquals = make("TagAttrEquals") # = | |||
TagAttrQuote = make("TagAttrQuote") # ", ' | |||
TagCloseOpen = make("TagCloseOpen") # > | |||
TagCloseSelfclose = make("TagCloseSelfclose") # /> | |||
TagOpenClose = make("TagOpenClose") # </ | |||
TagCloseClose = make("TagCloseClose") # > | |||
del make |
@@ -167,7 +167,7 @@ class ListProxy(_SliceNormalizerMixIn, list): | |||
def _render(self): | |||
"""Return the actual list from the stored start/stop/step.""" | |||
return list(self._parent)[self._start:self._stop:self._step] | |||
return list(self._parent)[self._start : self._stop : self._step] | |||
@inheritdoc | |||
def append(self, item): | |||
@@ -187,7 +187,7 @@ class ListProxy(_SliceNormalizerMixIn, list): | |||
@inheritdoc | |||
def extend(self, item): | |||
self._parent[self._stop:self._stop] = item | |||
self._parent[self._stop : self._stop] = item | |||
@inheritdoc | |||
def insert(self, index, item): | |||
@@ -215,7 +215,7 @@ class ListProxy(_SliceNormalizerMixIn, list): | |||
def reverse(self): | |||
item = self._render() | |||
item.reverse() | |||
self._parent[self._start:self._stop:self._step] = item | |||
self._parent[self._start : self._stop : self._step] = item | |||
@inheritdoc | |||
def sort(self, key=None, reverse=None): | |||
@@ -226,4 +226,4 @@ class ListProxy(_SliceNormalizerMixIn, list): | |||
if reverse is not None: | |||
kwargs["reverse"] = reverse | |||
item.sort(**kwargs) | |||
self._parent[self._start:self._stop:self._step] = item | |||
self._parent[self._start : self._stop : self._step] = item |
@@ -27,6 +27,7 @@ from sys import getdefaultencoding | |||
__all__ = ["StringMixIn"] | |||
def inheritdoc(method): | |||
"""Set __doc__ of *method* to __doc__ of *method* in its parent class. | |||
@@ -36,6 +37,7 @@ def inheritdoc(method): | |||
method.__doc__ = getattr(str, method.__name__).__doc__ | |||
return method | |||
class StringMixIn: | |||
"""Implement the interface for ``str`` in a dynamic manner. | |||
@@ -92,8 +94,9 @@ class StringMixIn: | |||
def __getattr__(self, attr): | |||
if not hasattr(str, attr): | |||
raise AttributeError("{!r} object has no attribute {!r}".format( | |||
type(self).__name__, attr)) | |||
raise AttributeError( | |||
"{!r} object has no attribute {!r}".format(type(self).__name__, attr) | |||
) | |||
return getattr(self.__str__(), attr) | |||
maketrans = str.maketrans # Static method can't rely on __getattr__ | |||
@@ -25,6 +25,7 @@ users generally won't need stuff from here. | |||
__all__ = ["parse_anything"] | |||
def parse_anything(value, context=0, skip_style_tags=False): | |||
"""Return a :class:`.Wikicode` for *value*, allowing multiple types. | |||
@@ -64,6 +65,8 @@ def parse_anything(value, context=0, skip_style_tags=False): | |||
nodelist += parse_anything(item, context, skip_style_tags).nodes | |||
return Wikicode(nodelist) | |||
except TypeError as exc: | |||
error = ("Needs string, Node, Wikicode, file, int, None, or " | |||
"iterable of these, but got {0}: {1}") | |||
error = ( | |||
"Needs string, Node, Wikicode, file, int, None, or " | |||
"iterable of these, but got {0}: {1}" | |||
) | |||
raise ValueError(error.format(type(value).__name__, value)) from exc |
@@ -21,8 +21,18 @@ | |||
import re | |||
from itertools import chain | |||
from .nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, | |||
Node, Tag, Template, Text, Wikilink) | |||
from .nodes import ( | |||
Argument, | |||
Comment, | |||
ExternalLink, | |||
Heading, | |||
HTMLEntity, | |||
Node, | |||
Tag, | |||
Template, | |||
Text, | |||
Wikilink, | |||
) | |||
from .smart_list.list_proxy import ListProxy | |||
from .string_mixin import StringMixIn | |||
from .utils import parse_anything | |||
@@ -31,6 +41,7 @@ __all__ = ["Wikicode"] | |||
FLAGS = re.IGNORECASE | re.DOTALL | re.UNICODE | |||
class Wikicode(StringMixIn): | |||
"""A ``Wikicode`` is a container for nodes that operates like a string. | |||
@@ -41,6 +52,7 @@ class Wikicode(StringMixIn): | |||
<ifilter>` series of functions is very useful for extracting and iterating | |||
over, for example, all of the templates in the object. | |||
""" | |||
RECURSE_OTHERS = 2 | |||
def __init__(self, nodes): | |||
@@ -82,8 +94,9 @@ class Wikicode(StringMixIn): | |||
return lambda obj: re.search(matches, str(obj), flags) | |||
return lambda obj: True | |||
def _indexed_ifilter(self, recursive=True, matches=None, flags=FLAGS, | |||
forcetype=None): | |||
def _indexed_ifilter( | |||
self, recursive=True, matches=None, flags=FLAGS, forcetype=None | |||
): | |||
"""Iterate over nodes and their corresponding indices in the node list. | |||
The arguments are interpreted as for :meth:`ifilter`. For each tuple | |||
@@ -94,9 +107,11 @@ class Wikicode(StringMixIn): | |||
match = self._build_matcher(matches, flags) | |||
if recursive: | |||
restrict = forcetype if recursive == self.RECURSE_OTHERS else None | |||
def getter(i, node): | |||
for ch in self._get_children(node, restrict=restrict): | |||
yield (i, ch) | |||
inodes = chain(*(getter(i, n) for i, n in enumerate(self.nodes))) | |||
else: | |||
inodes = enumerate(self.nodes) | |||
@@ -106,6 +121,7 @@ class Wikicode(StringMixIn): | |||
def _is_child_wikicode(self, obj, recursive=True): | |||
"""Return whether the given :class:`.Wikicode` is a descendant.""" | |||
def deref(nodes): | |||
if isinstance(nodes, ListProxy): | |||
return nodes._parent # pylint: disable=protected-access | |||
@@ -210,6 +226,7 @@ class Wikicode(StringMixIn): | |||
should be any object that can be tested for with ``is``. *indent* is | |||
the starting indentation. | |||
""" | |||
def write(*args): | |||
"""Write a new line following the proper indentation rules.""" | |||
if lines and lines[-1] is marker: # Continue from the last line | |||
@@ -243,10 +260,12 @@ class Wikicode(StringMixIn): | |||
This is equivalent to :meth:`{1}` with *forcetype* set to | |||
:class:`~{2.__module__}.{2.__name__}`. | |||
""" | |||
make_ifilter = lambda ftype: (lambda self, *a, **kw: | |||
self.ifilter(forcetype=ftype, *a, **kw)) | |||
make_filter = lambda ftype: (lambda self, *a, **kw: | |||
self.filter(forcetype=ftype, *a, **kw)) | |||
make_ifilter = lambda ftype: ( | |||
lambda self, *a, **kw: self.ifilter(forcetype=ftype, *a, **kw) | |||
) | |||
make_filter = lambda ftype: ( | |||
lambda self, *a, **kw: self.filter(forcetype=ftype, *a, **kw) | |||
) | |||
for name, ftype in meths.items(): | |||
ifilt = make_ifilter(ftype) | |||
filt = make_filter(ftype) | |||
@@ -342,6 +361,7 @@ class Wikicode(StringMixIn): | |||
Will return an empty list if *obj* is at the top level of this Wikicode | |||
object. Will raise :exc:`ValueError` if it wasn't found. | |||
""" | |||
def _get_ancestors(code, needle): | |||
for node in code.nodes: | |||
if node is needle: | |||
@@ -510,8 +530,7 @@ class Wikicode(StringMixIn): | |||
return True | |||
return False | |||
def ifilter(self, recursive=True, matches=None, flags=FLAGS, | |||
forcetype=None): | |||
def ifilter(self, recursive=True, matches=None, flags=FLAGS, forcetype=None): | |||
"""Iterate over nodes in our list matching certain conditions. | |||
If *forcetype* is given, only nodes that are instances of this type (or | |||
@@ -545,8 +564,15 @@ class Wikicode(StringMixIn): | |||
""" | |||
return list(self.ifilter(*args, **kwargs)) | |||
def get_sections(self, levels=None, matches=None, flags=FLAGS, flat=False, | |||
include_lead=None, include_headings=True): | |||
def get_sections( | |||
self, | |||
levels=None, | |||
matches=None, | |||
flags=FLAGS, | |||
flat=False, | |||
include_lead=None, | |||
include_headings=True, | |||
): | |||
"""Return a list of sections within the page. | |||
Sections are returned as :class:`.Wikicode` objects with a shared node | |||
@@ -568,12 +594,14 @@ class Wikicode(StringMixIn): | |||
:class:`.Heading` object will be included; otherwise, this is skipped. | |||
""" | |||
title_matcher = self._build_matcher(matches, flags) | |||
matcher = lambda heading: (title_matcher(heading.title) and | |||
(not levels or heading.level in levels)) | |||
matcher = lambda heading: ( | |||
title_matcher(heading.title) and (not levels or heading.level in levels) | |||
) | |||
iheadings = self._indexed_ifilter(recursive=False, forcetype=Heading) | |||
sections = [] # Tuples of (index_of_first_node, section) | |||
open_headings = [] # Tuples of (index, heading), where index and | |||
# heading.level are both monotonically increasing | |||
# Tuples of (index, heading), where index and heading.level are both | |||
# monotonically increasing | |||
open_headings = [] | |||
# Add the lead section if appropriate: | |||
if include_lead or not (include_lead is not None or matches or levels): | |||
@@ -610,8 +638,7 @@ class Wikicode(StringMixIn): | |||
# Ensure that earlier sections are earlier in the returned list: | |||
return [section for i, section in sorted(sections)] | |||
def strip_code(self, normalize=True, collapse=True, | |||
keep_template_params=False): | |||
def strip_code(self, normalize=True, collapse=True, keep_template_params=False): | |||
"""Return a rendered string without unprintable code such as templates. | |||
The way a node is stripped is handled by the | |||
@@ -631,7 +658,7 @@ class Wikicode(StringMixIn): | |||
kwargs = { | |||
"normalize": normalize, | |||
"collapse": collapse, | |||
"keep_template_params": keep_template_params | |||
"keep_template_params": keep_template_params, | |||
} | |||
nodes = [] | |||
@@ -673,7 +700,15 @@ class Wikicode(StringMixIn): | |||
marker = object() # Random object we can find with certainty in a list | |||
return "\n".join(self._get_tree(self, [], marker, 0)) | |||
Wikicode._build_filter_methods( | |||
arguments=Argument, comments=Comment, external_links=ExternalLink, | |||
headings=Heading, html_entities=HTMLEntity, tags=Tag, templates=Template, | |||
text=Text, wikilinks=Wikilink) | |||
arguments=Argument, | |||
comments=Comment, | |||
external_links=ExternalLink, | |||
headings=Heading, | |||
html_entities=HTMLEntity, | |||
tags=Tag, | |||
templates=Template, | |||
text=Text, | |||
wikilinks=Wikilink, | |||
) |
@@ -18,14 +18,24 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from mwparserfromhell.nodes import (Argument, Comment, ExternalLink, Heading, | |||
HTMLEntity, Tag, Template, Text, Wikilink) | |||
from mwparserfromhell.nodes import ( | |||
Argument, | |||
Comment, | |||
ExternalLink, | |||
Heading, | |||
HTMLEntity, | |||
Tag, | |||
Template, | |||
Text, | |||
Wikilink, | |||
) | |||
from mwparserfromhell.smart_list import SmartList | |||
from mwparserfromhell.wikicode import Wikicode | |||
wrap = lambda L: Wikicode(SmartList(L)) | |||
wraptext = lambda *args: wrap([Text(t) for t in args]) | |||
def _assert_node_equal(expected, actual): | |||
"""Assert that two Nodes have the same type and have the same data.""" | |||
registry = { | |||
@@ -43,6 +53,7 @@ def _assert_node_equal(expected, actual): | |||
assert type(expected) == type(actual) | |||
registry[type(expected)](expected, actual) | |||
def _assert_argument_node_equal(expected, actual): | |||
"""Assert that two Argument nodes have the same data.""" | |||
assert_wikicode_equal(expected.name, actual.name) | |||
@@ -51,10 +62,12 @@ def _assert_argument_node_equal(expected, actual): | |||
else: | |||
assert actual.default is None | |||
def _assert_comment_node_equal(expected, actual): | |||
"""Assert that two Comment nodes have the same data.""" | |||
assert expected.contents == actual.contents | |||
def _assert_external_link_node_equal(expected, actual): | |||
"""Assert that two ExternalLink nodes have the same data.""" | |||
assert_wikicode_equal(expected.url, actual.url) | |||
@@ -65,11 +78,13 @@ def _assert_external_link_node_equal(expected, actual): | |||
assert expected.brackets is actual.brackets | |||
assert expected.suppress_space is actual.suppress_space | |||
def _assert_heading_node_equal(expected, actual): | |||
"""Assert that two Heading nodes have the same data.""" | |||
assert_wikicode_equal(expected.title, actual.title) | |||
assert expected.level == actual.level | |||
def _assert_html_entity_node_equal(expected, actual): | |||
"""Assert that two HTMLEntity nodes have the same data.""" | |||
assert expected.value == actual.value | |||
@@ -77,6 +92,7 @@ def _assert_html_entity_node_equal(expected, actual): | |||
assert expected.hexadecimal is actual.hexadecimal | |||
assert expected.hex_char == actual.hex_char | |||
def _assert_tag_node_equal(expected, actual): | |||
"""Assert that two Tag nodes have the same data.""" | |||
assert_wikicode_equal(expected.tag, actual.tag) | |||
@@ -105,6 +121,7 @@ def _assert_tag_node_equal(expected, actual): | |||
assert expected.padding == actual.padding | |||
assert_wikicode_equal(expected.closing_tag, actual.closing_tag) | |||
def _assert_template_node_equal(expected, actual): | |||
"""Assert that two Template nodes have the same data.""" | |||
assert_wikicode_equal(expected.name, actual.name) | |||
@@ -117,10 +134,12 @@ def _assert_template_node_equal(expected, actual): | |||
assert_wikicode_equal(exp_param.value, act_param.value) | |||
assert exp_param.showkey is act_param.showkey | |||
def _assert_text_node_equal(expected, actual): | |||
"""Assert that two Text nodes have the same data.""" | |||
assert expected.value == actual.value | |||
def _assert_wikilink_node_equal(expected, actual): | |||
"""Assert that two Wikilink nodes have the same data.""" | |||
assert_wikicode_equal(expected.title, actual.title) | |||
@@ -129,6 +148,7 @@ def _assert_wikilink_node_equal(expected, actual): | |||
else: | |||
assert actual.text is None | |||
def assert_wikicode_equal(expected, actual): | |||
"""Assert that two Wikicode objects have the same data.""" | |||
assert isinstance(actual, Wikicode) | |||
@@ -27,6 +27,7 @@ import pytest | |||
from mwparserfromhell.nodes import Argument, Text | |||
from .conftest import assert_wikicode_equal, wrap, wraptext | |||
def test_str(): | |||
"""test Argument.__str__()""" | |||
node = Argument(wraptext("foobar")) | |||
@@ -34,6 +35,7 @@ def test_str(): | |||
node2 = Argument(wraptext("foo"), wraptext("bar")) | |||
assert "{{{foo|bar}}}" == str(node2) | |||
def test_children(): | |||
"""test Argument.__children__()""" | |||
node1 = Argument(wraptext("foobar")) | |||
@@ -48,6 +50,7 @@ def test_children(): | |||
with pytest.raises(StopIteration): | |||
next(gen2) | |||
def test_strip(): | |||
"""test Argument.__strip__()""" | |||
node1 = Argument(wraptext("foobar")) | |||
@@ -55,6 +58,7 @@ def test_strip(): | |||
assert node1.__strip__() is None | |||
assert "bar" == node2.__strip__() | |||
def test_showtree(): | |||
"""test Argument.__showtree__()""" | |||
output = [] | |||
@@ -66,10 +70,19 @@ def test_showtree(): | |||
node1.__showtree__(output.append, get, mark) | |||
node2.__showtree__(output.append, get, mark) | |||
valid = [ | |||
"{{{", (getter, node1.name), "}}}", "{{{", (getter, node2.name), | |||
" | ", marker, (getter, node2.default), "}}}"] | |||
"{{{", | |||
(getter, node1.name), | |||
"}}}", | |||
"{{{", | |||
(getter, node2.name), | |||
" | ", | |||
marker, | |||
(getter, node2.default), | |||
"}}}", | |||
] | |||
assert valid == output | |||
def test_name(): | |||
"""test getter/setter for the name attribute""" | |||
name = wraptext("foobar") | |||
@@ -82,6 +95,7 @@ def test_name(): | |||
assert_wikicode_equal(wraptext("héhehé"), node1.name) | |||
assert_wikicode_equal(wraptext("héhehé"), node2.name) | |||
def test_default(): | |||
"""test getter/setter for the default attribute""" | |||
default = wraptext("baz") | |||
@@ -28,6 +28,7 @@ from mwparserfromhell.nodes import Template | |||
from mwparserfromhell.nodes.extras import Attribute | |||
from .conftest import assert_wikicode_equal, wrap, wraptext | |||
def test_str(): | |||
"""test Attribute.__str__()""" | |||
node = Attribute(wraptext("foo")) | |||
@@ -43,6 +44,7 @@ def test_str(): | |||
node6 = Attribute(wraptext("a"), wrap([]), None, " ", "", " ") | |||
assert " a= " == str(node6) | |||
def test_name(): | |||
"""test getter/setter for the name attribute""" | |||
name = wraptext("id") | |||
@@ -51,6 +53,7 @@ def test_name(): | |||
node.name = "{{id}}" | |||
assert_wikicode_equal(wrap([Template(wraptext("id"))]), node.name) | |||
def test_value(): | |||
"""test getter/setter for the value attribute""" | |||
value = wraptext("foo") | |||
@@ -74,6 +77,7 @@ def test_value(): | |||
assert_wikicode_equal(wraptext("fo\"o 'bar' b\"az"), node2.value) | |||
assert '"' == node2.quotes | |||
def test_quotes(): | |||
"""test getter/setter for the quotes attribute""" | |||
node1 = Attribute(wraptext("id"), wraptext("foo"), None) | |||
@@ -92,6 +96,7 @@ def test_quotes(): | |||
with pytest.raises(ValueError): | |||
Attribute(wraptext("id"), wraptext("foo bar baz"), None) | |||
def test_padding(): | |||
"""test getter/setter for the padding attributes""" | |||
for pad in ["pad_first", "pad_before_eq", "pad_after_eq"]: | |||
@@ -26,11 +26,13 @@ import pytest | |||
from mwparserfromhell.nodes import Comment | |||
def test_str(): | |||
"""test Comment.__str__()""" | |||
node = Comment("foobar") | |||
assert "<!--foobar-->" == str(node) | |||
def test_children(): | |||
"""test Comment.__children__()""" | |||
node = Comment("foobar") | |||
@@ -38,11 +40,13 @@ def test_children(): | |||
with pytest.raises(StopIteration): | |||
next(gen) | |||
def test_strip(): | |||
"""test Comment.__strip__()""" | |||
node = Comment("foobar") | |||
assert node.__strip__() is None | |||
def test_showtree(): | |||
"""test Comment.__showtree__()""" | |||
output = [] | |||
@@ -50,6 +54,7 @@ def test_showtree(): | |||
node.__showtree__(output.append, None, None) | |||
assert ["<!--foobar-->"] == output | |||
def test_contents(): | |||
"""test getter/setter for the contents attribute""" | |||
node = Comment("foobar") | |||
@@ -32,6 +32,7 @@ import pytest | |||
import mwparserfromhell | |||
def assert_print(value, output): | |||
"""Assertion check that *value*, when printed, produces *output*.""" | |||
buff = StringIO() | |||
@@ -39,6 +40,7 @@ def assert_print(value, output): | |||
buff.seek(0) | |||
assert output == buff.read() | |||
def test_readme_1(): | |||
"""test a block of example code in the README""" | |||
text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" | |||
@@ -52,6 +54,7 @@ def test_readme_1(): | |||
assert_print(template.get(1).value, "bar") | |||
assert_print(template.get("eggs").value, "spam") | |||
def test_readme_2(): | |||
"""test a block of example code in the README""" | |||
text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" | |||
@@ -59,17 +62,19 @@ def test_readme_2(): | |||
res = "['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']" | |||
assert_print(temps, res) | |||
def test_readme_3(): | |||
"""test a block of example code in the README""" | |||
code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") | |||
assert_print(code.filter_templates(recursive=False), | |||
"['{{foo|this {{includes a|template}}}}']") | |||
assert_print( | |||
code.filter_templates(recursive=False), | |||
"['{{foo|this {{includes a|template}}}}']", | |||
) | |||
foo = code.filter_templates(recursive=False)[0] | |||
assert_print(foo.get(1).value, "this {{includes a|template}}") | |||
assert_print(foo.get(1).value.filter_templates()[0], | |||
"{{includes a|template}}") | |||
assert_print(foo.get(1).value.filter_templates()[0].get(1).value, | |||
"template") | |||
assert_print(foo.get(1).value.filter_templates()[0], "{{includes a|template}}") | |||
assert_print(foo.get(1).value.filter_templates()[0].get(1).value, "template") | |||
def test_readme_4(): | |||
"""test a block of example code in the README""" | |||
@@ -90,6 +95,7 @@ def test_readme_4(): | |||
assert_print(text, res) | |||
assert text == code | |||
@pytest.mark.skipif("NOWEB" in os.environ, reason="web test disabled by environ var") | |||
def test_readme_5(): | |||
"""test a block of example code in the README; includes a web call""" | |||
@@ -27,6 +27,7 @@ import pytest | |||
from mwparserfromhell.nodes import ExternalLink, Text | |||
from .conftest import assert_wikicode_equal, wrap, wraptext | |||
def test_str(): | |||
"""test ExternalLink.__str__()""" | |||
node = ExternalLink(wraptext("http://example.com/"), brackets=False) | |||
@@ -35,15 +36,16 @@ def test_str(): | |||
assert "[http://example.com/]" == str(node2) | |||
node3 = ExternalLink(wraptext("http://example.com/"), wrap([])) | |||
assert "[http://example.com/ ]" == str(node3) | |||
node4 = ExternalLink(wraptext("http://example.com/"), | |||
wraptext("Example Web Page")) | |||
node4 = ExternalLink(wraptext("http://example.com/"), wraptext("Example Web Page")) | |||
assert "[http://example.com/ Example Web Page]" == str(node4) | |||
def test_children(): | |||
"""test ExternalLink.__children__()""" | |||
node1 = ExternalLink(wraptext("http://example.com/"), brackets=False) | |||
node2 = ExternalLink(wraptext("http://example.com/"), | |||
wrap([Text("Example"), Text("Page")])) | |||
node2 = ExternalLink( | |||
wraptext("http://example.com/"), wrap([Text("Example"), Text("Page")]) | |||
) | |||
gen1 = node1.__children__() | |||
gen2 = node2.__children__() | |||
assert node1.url == next(gen1) | |||
@@ -54,6 +56,7 @@ def test_children(): | |||
with pytest.raises(StopIteration): | |||
next(gen2) | |||
def test_strip(): | |||
"""test ExternalLink.__strip__()""" | |||
node1 = ExternalLink(wraptext("http://example.com"), brackets=False) | |||
@@ -66,6 +69,7 @@ def test_strip(): | |||
assert node3.__strip__() is None | |||
assert "Link" == node4.__strip__() | |||
def test_showtree(): | |||
"""test ExternalLink.__showtree__()""" | |||
output = [] | |||
@@ -76,11 +80,10 @@ def test_showtree(): | |||
node2 = ExternalLink(wraptext("http://example.com"), wraptext("Link")) | |||
node1.__showtree__(output.append, get, mark) | |||
node2.__showtree__(output.append, get, mark) | |||
valid = [ | |||
(getter, node1.url), "[", (getter, node2.url), | |||
(getter, node2.title), "]"] | |||
valid = [(getter, node1.url), "[", (getter, node2.url), (getter, node2.title), "]"] | |||
assert valid == output | |||
def test_url(): | |||
"""test getter/setter for the url attribute""" | |||
url = wraptext("http://example.com/") | |||
@@ -93,6 +96,7 @@ def test_url(): | |||
assert_wikicode_equal(wraptext("mailto:héhehé@spam.com"), node1.url) | |||
assert_wikicode_equal(wraptext("mailto:héhehé@spam.com"), node2.url) | |||
def test_title(): | |||
"""test getter/setter for the title attribute""" | |||
title = wraptext("Example!") | |||
@@ -105,6 +109,7 @@ def test_title(): | |||
node2.title = "My Website" | |||
assert_wikicode_equal(wraptext("My Website"), node2.title) | |||
def test_brackets(): | |||
"""test getter/setter for the brackets attribute""" | |||
node1 = ExternalLink(wraptext("http://example.com/"), brackets=False) | |||
@@ -27,6 +27,7 @@ import pytest | |||
from mwparserfromhell.nodes import Heading, Text | |||
from .conftest import assert_wikicode_equal, wrap, wraptext | |||
def test_str(): | |||
"""test Heading.__str__()""" | |||
node = Heading(wraptext("foobar"), 2) | |||
@@ -34,6 +35,7 @@ def test_str(): | |||
node2 = Heading(wraptext(" zzz "), 5) | |||
assert "===== zzz =====" == str(node2) | |||
def test_children(): | |||
"""test Heading.__children__()""" | |||
node = Heading(wrap([Text("foo"), Text("bar")]), 3) | |||
@@ -42,11 +44,13 @@ def test_children(): | |||
with pytest.raises(StopIteration): | |||
next(gen) | |||
def test_strip(): | |||
"""test Heading.__strip__()""" | |||
node = Heading(wraptext("foobar"), 3) | |||
assert "foobar" == node.__strip__() | |||
def test_showtree(): | |||
"""test Heading.__showtree__()""" | |||
output = [] | |||
@@ -56,10 +60,10 @@ def test_showtree(): | |||
node2 = Heading(wraptext(" baz "), 4) | |||
node1.__showtree__(output.append, get, None) | |||
node2.__showtree__(output.append, get, None) | |||
valid = ["===", (getter, node1.title), "===", | |||
"====", (getter, node2.title), "===="] | |||
valid = ["===", (getter, node1.title), "===", "====", (getter, node2.title), "===="] | |||
assert valid == output | |||
def test_title(): | |||
"""test getter/setter for the title attribute""" | |||
title = wraptext("foobar") | |||
@@ -68,6 +72,7 @@ def test_title(): | |||
node.title = "héhehé" | |||
assert_wikicode_equal(wraptext("héhehé"), node.title) | |||
def test_level(): | |||
"""test getter/setter for the level attribute""" | |||
node = Heading(wraptext("foobar"), 3) | |||
@@ -26,6 +26,7 @@ import pytest | |||
from mwparserfromhell.nodes import HTMLEntity | |||
def test_str(): | |||
"""test HTMLEntity.__str__()""" | |||
node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) | |||
@@ -37,6 +38,7 @@ def test_str(): | |||
assert "k" == str(node3) | |||
assert "l" == str(node4) | |||
def test_children(): | |||
"""test HTMLEntity.__children__()""" | |||
node = HTMLEntity("nbsp", named=True, hexadecimal=False) | |||
@@ -44,6 +46,7 @@ def test_children(): | |||
with pytest.raises(StopIteration): | |||
next(gen) | |||
def test_strip(): | |||
"""test HTMLEntity.__strip__()""" | |||
node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) | |||
@@ -57,6 +60,7 @@ def test_strip(): | |||
assert "é" == node3.__strip__(normalize=True) | |||
assert "é" == node3.__strip__(normalize=False) | |||
def test_showtree(): | |||
"""test HTMLEntity.__showtree__()""" | |||
output = [] | |||
@@ -69,6 +73,7 @@ def test_showtree(): | |||
res = [" ", "k", "é"] | |||
assert res == output | |||
def test_value(): | |||
"""test getter/setter for the value attribute""" | |||
node1 = HTMLEntity("nbsp") | |||
@@ -109,6 +114,7 @@ def test_value(): | |||
with pytest.raises(ValueError): | |||
node1.__setattr__("value", "12FFFF") | |||
def test_named(): | |||
"""test getter/setter for the named attribute""" | |||
node1 = HTMLEntity("nbsp") | |||
@@ -130,6 +136,7 @@ def test_named(): | |||
with pytest.raises(ValueError): | |||
node3.__setattr__("named", True) | |||
def test_hexadecimal(): | |||
"""test getter/setter for the hexadecimal attribute""" | |||
node1 = HTMLEntity("nbsp") | |||
@@ -147,6 +154,7 @@ def test_hexadecimal(): | |||
with pytest.raises(ValueError): | |||
node1.__setattr__("hexadecimal", True) | |||
def test_hex_char(): | |||
"""test getter/setter for the hex_char attribute""" | |||
node1 = HTMLEntity("e9") | |||
@@ -164,6 +172,7 @@ def test_hex_char(): | |||
with pytest.raises(ValueError): | |||
node1.__setattr__("hex_char", True) | |||
def test_normalize(): | |||
"""test getter/setter for the normalize attribute""" | |||
node1 = HTMLEntity("nbsp") | |||
@@ -27,6 +27,7 @@ import pytest | |||
from mwparserfromhell.nodes.extras import Parameter | |||
from .conftest import assert_wikicode_equal, wraptext | |||
def test_str(): | |||
"""test Parameter.__str__()""" | |||
node = Parameter(wraptext("1"), wraptext("foo"), showkey=False) | |||
@@ -34,6 +35,7 @@ def test_str(): | |||
node2 = Parameter(wraptext("foo"), wraptext("bar")) | |||
assert "foo=bar" == str(node2) | |||
def test_name(): | |||
"""test getter/setter for the name attribute""" | |||
name1 = wraptext("1") | |||
@@ -47,6 +49,7 @@ def test_name(): | |||
assert_wikicode_equal(wraptext("héhehé"), node1.name) | |||
assert_wikicode_equal(wraptext("héhehé"), node2.name) | |||
def test_value(): | |||
"""test getter/setter for the value attribute""" | |||
value = wraptext("bar") | |||
@@ -55,6 +58,7 @@ def test_value(): | |||
node.value = "héhehé" | |||
assert_wikicode_equal(wraptext("héhehé"), node.value) | |||
def test_showkey(): | |||
"""test getter/setter for the showkey attribute""" | |||
node1 = Parameter(wraptext("1"), wraptext("foo"), showkey=False) | |||
@@ -29,6 +29,7 @@ from mwparserfromhell.nodes import Tag, Template, Text, Wikilink | |||
from mwparserfromhell.nodes.extras import Parameter | |||
from .conftest import assert_wikicode_equal, wrap, wraptext | |||
@pytest.fixture() | |||
def pyparser(): | |||
"""make sure the correct tokenizer is used""" | |||
@@ -38,37 +39,60 @@ def pyparser(): | |||
yield | |||
parser.use_c = restore | |||
def test_use_c(pyparser): | |||
assert parser.Parser()._tokenizer.USES_C is False | |||
def test_parsing(pyparser): | |||
"""integration test for parsing overall""" | |||
text = "this is text; {{this|is=a|template={{with|[[links]]|in}}it}}" | |||
expected = wrap([ | |||
Text("this is text; "), | |||
Template(wraptext("this"), [ | |||
Parameter(wraptext("is"), wraptext("a")), | |||
Parameter(wraptext("template"), wrap([ | |||
Template(wraptext("with"), [ | |||
Parameter(wraptext("1"), | |||
wrap([Wikilink(wraptext("links"))]), | |||
showkey=False), | |||
Parameter(wraptext("2"), | |||
wraptext("in"), showkey=False) | |||
]), | |||
Text("it") | |||
])) | |||
]) | |||
]) | |||
expected = wrap( | |||
[ | |||
Text("this is text; "), | |||
Template( | |||
wraptext("this"), | |||
[ | |||
Parameter(wraptext("is"), wraptext("a")), | |||
Parameter( | |||
wraptext("template"), | |||
wrap( | |||
[ | |||
Template( | |||
wraptext("with"), | |||
[ | |||
Parameter( | |||
wraptext("1"), | |||
wrap([Wikilink(wraptext("links"))]), | |||
showkey=False, | |||
), | |||
Parameter( | |||
wraptext("2"), wraptext("in"), showkey=False | |||
), | |||
], | |||
), | |||
Text("it"), | |||
] | |||
), | |||
), | |||
], | |||
), | |||
] | |||
) | |||
actual = parser.Parser().parse(text) | |||
assert_wikicode_equal(expected, actual) | |||
def test_skip_style_tags(pyparser): | |||
"""test Parser.parse(skip_style_tags=True)""" | |||
text = "This is an example with ''italics''!" | |||
a = wrap([Text("This is an example with "), | |||
Tag(wraptext("i"), wraptext("italics"), wiki_markup="''"), | |||
Text("!")]) | |||
a = wrap( | |||
[ | |||
Text("This is an example with "), | |||
Tag(wraptext("i"), wraptext("italics"), wiki_markup="''"), | |||
Text("!"), | |||
] | |||
) | |||
b = wraptext("This is an example with ''italics''!") | |||
with_style = parser.Parser().parse(text, skip_style_tags=False) | |||
@@ -27,6 +27,7 @@ import pytest | |||
from mwparserfromhell.smart_list import SmartList | |||
from mwparserfromhell.smart_list.list_proxy import ListProxy | |||
def _test_get_set_del_item(builder): | |||
"""Run tests on __get/set/delitem__ of a list built with *builder*.""" | |||
list1 = builder([0, 1, 2, 3, "one", "two"]) | |||
@@ -104,6 +105,7 @@ def _test_get_set_del_item(builder): | |||
del list2[2:8:2] | |||
assert [0, 1, 3, 5, 7, 8, 9] == list2 | |||
def _test_add_radd_iadd(builder): | |||
"""Run tests on __r/i/add__ of a list built with *builder*.""" | |||
list1 = builder(range(5)) | |||
@@ -116,6 +118,7 @@ def _test_add_radd_iadd(builder): | |||
list1 += ["foo", "bar", "baz"] | |||
assert [0, 1, 2, 3, 4, "foo", "bar", "baz"] == list1 | |||
def _test_other_magic_methods(builder): | |||
"""Run tests on other magic methods of a list built with *builder*.""" | |||
list1 = builder([0, 1, 2, 3, "one", "two"]) | |||
@@ -200,6 +203,7 @@ def _test_other_magic_methods(builder): | |||
list4 *= 2 | |||
assert [0, 1, 2, 0, 1, 2] == list4 | |||
def _test_list_methods(builder): | |||
"""Run tests on the public methods of a list built with *builder*.""" | |||
list1 = builder(range(5)) | |||
@@ -263,6 +267,7 @@ def _test_list_methods(builder): | |||
list3.sort(key=lambda i: i[1], reverse=True) | |||
assert [("b", 8), ("a", 5), ("c", 3), ("d", 2)] == list3 | |||
def _dispatch_test_for_children(meth): | |||
"""Run a test method on various different types of children.""" | |||
meth(lambda L: SmartList(list(L))[:]) | |||
@@ -270,10 +275,20 @@ def _dispatch_test_for_children(meth): | |||
meth(lambda L: SmartList(list(L) + [999])[:-1]) | |||
meth(lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]) | |||
def test_docs(): | |||
"""make sure the methods of SmartList/ListProxy have docstrings""" | |||
methods = ["append", "count", "extend", "index", "insert", "pop", | |||
"remove", "reverse", "sort"] | |||
methods = [ | |||
"append", | |||
"count", | |||
"extend", | |||
"index", | |||
"insert", | |||
"pop", | |||
"remove", | |||
"reverse", | |||
"sort", | |||
] | |||
for meth in methods: | |||
expected = getattr(list, meth).__doc__ | |||
smartlist_doc = getattr(SmartList, meth).__doc__ | |||
@@ -281,6 +296,7 @@ def test_docs(): | |||
assert expected == smartlist_doc | |||
assert expected == listproxy_doc | |||
def test_doctest(): | |||
"""make sure the test embedded in SmartList's docstring passes""" | |||
parent = SmartList([0, 1, 2, 3]) | |||
@@ -291,38 +307,47 @@ def test_doctest(): | |||
assert [2, 3, 4] == child | |||
assert [0, 1, 2, 3, 4] == parent | |||
def test_parent_get_set_del(): | |||
"""make sure SmartList's getitem/setitem/delitem work""" | |||
_test_get_set_del_item(SmartList) | |||
def test_parent_add(): | |||
"""make sure SmartList's add/radd/iadd work""" | |||
_test_add_radd_iadd(SmartList) | |||
def test_parent_other_magics(): | |||
"""make sure SmartList's other magically implemented features work""" | |||
_test_other_magic_methods(SmartList) | |||
def test_parent_methods(): | |||
"""make sure SmartList's non-magic methods work, like append()""" | |||
_test_list_methods(SmartList) | |||
def test_child_get_set_del(): | |||
"""make sure ListProxy's getitem/setitem/delitem work""" | |||
_dispatch_test_for_children(_test_get_set_del_item) | |||
def test_child_add(): | |||
"""make sure ListProxy's add/radd/iadd work""" | |||
_dispatch_test_for_children(_test_add_radd_iadd) | |||
def test_child_other_magics(): | |||
"""make sure ListProxy's other magically implemented features work""" | |||
_dispatch_test_for_children(_test_other_magic_methods) | |||
def test_child_methods(): | |||
"""make sure ListProxy's non-magic methods work, like append()""" | |||
_dispatch_test_for_children(_test_list_methods) | |||
def test_influence(): | |||
"""make sure changes are propagated from parents to children""" | |||
parent = SmartList([0, 1, 2, 3, 4, 5]) | |||
@@ -29,6 +29,7 @@ import pytest | |||
from mwparserfromhell.string_mixin import StringMixIn | |||
class _FakeString(StringMixIn): | |||
def __init__(self, data): | |||
self._data = data | |||
@@ -36,22 +37,63 @@ class _FakeString(StringMixIn): | |||
def __str__(self): | |||
return self._data | |||
@pytest.mark.parametrize('method', [ | |||
"capitalize", "casefold", "center", "count", "encode", "endswith", | |||
"expandtabs", "find", "format", "format_map", "index", "isalnum", | |||
"isalpha", "isdecimal", "isdigit", "isidentifier", "islower", | |||
"isnumeric", "isprintable", "isspace", "istitle", "isupper", | |||
"join", "ljust", "lower", "lstrip", "maketrans", "partition", | |||
"replace", "rfind", "rindex", "rjust", "rpartition", "rsplit", | |||
"rstrip", "split", "splitlines", "startswith", "strip", "swapcase", | |||
"title", "translate", "upper", "zfill" | |||
]) | |||
@pytest.mark.parametrize( | |||
"method", | |||
[ | |||
"capitalize", | |||
"casefold", | |||
"center", | |||
"count", | |||
"encode", | |||
"endswith", | |||
"expandtabs", | |||
"find", | |||
"format", | |||
"format_map", | |||
"index", | |||
"isalnum", | |||
"isalpha", | |||
"isdecimal", | |||
"isdigit", | |||
"isidentifier", | |||
"islower", | |||
"isnumeric", | |||
"isprintable", | |||
"isspace", | |||
"istitle", | |||
"isupper", | |||
"join", | |||
"ljust", | |||
"lower", | |||
"lstrip", | |||
"maketrans", | |||
"partition", | |||
"replace", | |||
"rfind", | |||
"rindex", | |||
"rjust", | |||
"rpartition", | |||
"rsplit", | |||
"rstrip", | |||
"split", | |||
"splitlines", | |||
"startswith", | |||
"strip", | |||
"swapcase", | |||
"title", | |||
"translate", | |||
"upper", | |||
"zfill", | |||
], | |||
) | |||
def test_docs(method): | |||
"""make sure the various methods of StringMixIn have docstrings""" | |||
expected = getattr("foo", method).__doc__ | |||
actual = getattr(_FakeString("foo"), method).__doc__ | |||
assert expected == actual | |||
def test_types(): | |||
"""make sure StringMixIns convert to different types correctly""" | |||
fstr = _FakeString("fake string") | |||
@@ -63,6 +105,7 @@ def test_types(): | |||
assert isinstance(bytes(fstr), bytes) | |||
assert isinstance(repr(fstr), str) | |||
def test_comparisons(): | |||
"""make sure comparison operators work""" | |||
str1 = _FakeString("this is a fake string") | |||
@@ -99,6 +142,7 @@ def test_comparisons(): | |||
assert str5 < str1 | |||
assert str5 <= str1 | |||
def test_other_magics(): | |||
"""test other magically implemented features, like len() and iter()""" | |||
str1 = _FakeString("fake string") | |||
@@ -154,6 +198,7 @@ def test_other_magics(): | |||
assert "real" not in str1 | |||
assert "s" not in str2 | |||
def test_other_methods(): | |||
"""test the remaining non-magic methods of StringMixIn""" | |||
str1 = _FakeString("fake string") | |||
@@ -354,8 +399,21 @@ def test_other_methods(): | |||
actual = ["this", "is", "a", "sentence", "with", "whitespace"] | |||
assert actual == str25.rsplit() | |||
assert actual == str25.rsplit(None) | |||
actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", | |||
"", "whitespace", ""] | |||
actual = [ | |||
"", | |||
"", | |||
"", | |||
"this", | |||
"is", | |||
"a", | |||
"", | |||
"", | |||
"sentence", | |||
"with", | |||
"", | |||
"whitespace", | |||
"", | |||
] | |||
assert actual == str25.rsplit(" ") | |||
actual = [" this is a", "sentence", "with", "whitespace"] | |||
assert actual == str25.rsplit(None, 3) | |||
@@ -371,8 +429,21 @@ def test_other_methods(): | |||
actual = ["this", "is", "a", "sentence", "with", "whitespace"] | |||
assert actual == str25.split() | |||
assert actual == str25.split(None) | |||
actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", | |||
"", "whitespace", ""] | |||
actual = [ | |||
"", | |||
"", | |||
"", | |||
"this", | |||
"is", | |||
"a", | |||
"", | |||
"", | |||
"sentence", | |||
"with", | |||
"", | |||
"whitespace", | |||
"", | |||
] | |||
assert actual == str25.split(" ") | |||
actual = ["this", "is", "a", "sentence with whitespace "] | |||
assert actual == str25.split(None, 3) | |||
@@ -382,10 +453,15 @@ def test_other_methods(): | |||
assert actual == str25.split(maxsplit=3) | |||
str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") | |||
assert ["lines", "of", "text", "are", "presented", "here"] \ | |||
== str26.splitlines() | |||
assert ["lines\n", "of\n", "text\r\n", "are\r\n", "presented\n", "here"] \ | |||
== str26.splitlines(True) | |||
assert ["lines", "of", "text", "are", "presented", "here"] == str26.splitlines() | |||
assert [ | |||
"lines\n", | |||
"of\n", | |||
"text\r\n", | |||
"are\r\n", | |||
"presented\n", | |||
"here", | |||
] == str26.splitlines(True) | |||
assert str1.startswith("fake") is True | |||
assert str1.startswith("faker") is False | |||
@@ -398,8 +474,7 @@ def test_other_methods(): | |||
assert "Fake String" == str1.title() | |||
table1 = StringMixIn.maketrans({97: "1", 101: "2", 105: "3", | |||
111: "4", 117: "5"}) | |||
table1 = StringMixIn.maketrans({97: "1", 101: "2", 105: "3", 111: "4", 117: "5"}) | |||
table2 = StringMixIn.maketrans("aeiou", "12345") | |||
table3 = StringMixIn.maketrans("aeiou", "12345", "rts") | |||
assert "f1k2 str3ng" == str1.translate(table1) | |||
@@ -34,21 +34,20 @@ agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), None) | |||
agenp = lambda name, v, a, b, c: Attribute(wraptext(name), v, '"', a, b, c) | |||
agenpnv = lambda name, a, b, c: Attribute(wraptext(name), None, '"', a, b, c) | |||
def test_str(): | |||
"""test Tag.__str__()""" | |||
node1 = Tag(wraptext("ref")) | |||
node2 = Tag(wraptext("span"), wraptext("foo"), | |||
[agen("style", "color: red;")]) | |||
node3 = Tag(wraptext("ref"), | |||
attrs=[agennq("name", "foo"), | |||
agenpnv("some_attr", " ", "", "")], | |||
self_closing=True) | |||
node2 = Tag(wraptext("span"), wraptext("foo"), [agen("style", "color: red;")]) | |||
node3 = Tag( | |||
wraptext("ref"), | |||
attrs=[agennq("name", "foo"), agenpnv("some_attr", " ", "", "")], | |||
self_closing=True, | |||
) | |||
node4 = Tag(wraptext("br"), self_closing=True, padding=" ") | |||
node5 = Tag(wraptext("br"), self_closing=True, implicit=True) | |||
node6 = Tag(wraptext("br"), self_closing=True, invalid=True, | |||
implicit=True) | |||
node7 = Tag(wraptext("br"), self_closing=True, invalid=True, | |||
padding=" ") | |||
node6 = Tag(wraptext("br"), self_closing=True, invalid=True, implicit=True) | |||
node7 = Tag(wraptext("br"), self_closing=True, invalid=True, padding=" ") | |||
node8 = Tag(wraptext("hr"), wiki_markup="----", self_closing=True) | |||
node9 = Tag(wraptext("i"), wraptext("italics!"), wiki_markup="''") | |||
@@ -62,6 +61,7 @@ def test_str(): | |||
assert "----" == str(node8) | |||
assert "''italics!''" == str(node9) | |||
def test_children(): | |||
"""test Tag.__children__()""" | |||
# <ref>foobar</ref> | |||
@@ -69,10 +69,12 @@ def test_children(): | |||
# '''bold text''' | |||
node2 = Tag(wraptext("b"), wraptext("bold text"), wiki_markup="'''") | |||
# <img id="foo" class="bar" selected /> | |||
node3 = Tag(wraptext("img"), | |||
attrs=[agen("id", "foo"), agen("class", "bar"), | |||
agennv("selected")], | |||
self_closing=True, padding=" ") | |||
node3 = Tag( | |||
wraptext("img"), | |||
attrs=[agen("id", "foo"), agen("class", "bar"), agennv("selected")], | |||
self_closing=True, | |||
padding=" ", | |||
) | |||
gen1 = node1.__children__() | |||
gen2 = node2.__children__() | |||
@@ -94,6 +96,7 @@ def test_children(): | |||
with pytest.raises(StopIteration): | |||
next(gen3) | |||
def test_strip(): | |||
"""test Tag.__strip__()""" | |||
node1 = Tag(wraptext("i"), wraptext("foobar")) | |||
@@ -104,28 +107,46 @@ def test_strip(): | |||
assert node2.__strip__() is None | |||
assert node3.__strip__() is None | |||
def test_showtree(): | |||
"""test Tag.__showtree__()""" | |||
output = [] | |||
getter, marker = object(), object() | |||
get = lambda code: output.append((getter, code)) | |||
mark = lambda: output.append(marker) | |||
node1 = Tag(wraptext("ref"), wraptext("text"), | |||
[agen("name", "foo"), agennv("selected")]) | |||
node1 = Tag( | |||
wraptext("ref"), wraptext("text"), [agen("name", "foo"), agennv("selected")] | |||
) | |||
node2 = Tag(wraptext("br"), self_closing=True, padding=" ") | |||
node3 = Tag(wraptext("br"), self_closing=True, invalid=True, | |||
implicit=True, padding=" ") | |||
node3 = Tag( | |||
wraptext("br"), self_closing=True, invalid=True, implicit=True, padding=" " | |||
) | |||
node1.__showtree__(output.append, get, mark) | |||
node2.__showtree__(output.append, get, mark) | |||
node3.__showtree__(output.append, get, mark) | |||
valid = [ | |||
"<", (getter, node1.tag), (getter, node1.attributes[0].name), | |||
" = ", marker, (getter, node1.attributes[0].value), | |||
(getter, node1.attributes[1].name), ">", (getter, node1.contents), | |||
"</", (getter, node1.closing_tag), ">", "<", (getter, node2.tag), | |||
"/>", "</", (getter, node3.tag), ">"] | |||
"<", | |||
(getter, node1.tag), | |||
(getter, node1.attributes[0].name), | |||
" = ", | |||
marker, | |||
(getter, node1.attributes[0].value), | |||
(getter, node1.attributes[1].name), | |||
">", | |||
(getter, node1.contents), | |||
"</", | |||
(getter, node1.closing_tag), | |||
">", | |||
"<", | |||
(getter, node2.tag), | |||
"/>", | |||
"</", | |||
(getter, node3.tag), | |||
">", | |||
] | |||
assert valid == output | |||
def test_tag(): | |||
"""test getter/setter for the tag attribute""" | |||
tag = wraptext("ref") | |||
@@ -137,6 +158,7 @@ def test_tag(): | |||
assert_wikicode_equal(wraptext("span"), node.closing_tag) | |||
assert "<span>text</span>" == node | |||
def test_contents(): | |||
"""test getter/setter for the contents attribute""" | |||
contents = wraptext("text") | |||
@@ -147,6 +169,7 @@ def test_contents(): | |||
assert_wikicode_equal(parsed, node.contents) | |||
assert "<ref>text and a {{template}}</ref>" == node | |||
def test_attributes(): | |||
"""test getter for the attributes attribute""" | |||
attrs = [agen("name", "bar")] | |||
@@ -155,6 +178,7 @@ def test_attributes(): | |||
assert [] == node1.attributes | |||
assert attrs is node2.attributes | |||
def test_wiki_markup(): | |||
"""test getter/setter for the wiki_markup attribute""" | |||
node = Tag(wraptext("i"), wraptext("italic text")) | |||
@@ -166,6 +190,7 @@ def test_wiki_markup(): | |||
assert node.wiki_markup is None | |||
assert "<i>italic text</i>" == node | |||
def test_self_closing(): | |||
"""test getter/setter for the self_closing attribute""" | |||
node = Tag(wraptext("ref"), wraptext("foobar")) | |||
@@ -177,6 +202,7 @@ def test_self_closing(): | |||
assert node.self_closing is False | |||
assert "<ref>foobar</ref>" == node | |||
def test_invalid(): | |||
"""test getter/setter for the invalid attribute""" | |||
node = Tag(wraptext("br"), self_closing=True, implicit=True) | |||
@@ -188,6 +214,7 @@ def test_invalid(): | |||
assert node.invalid is False | |||
assert "<br>" == node | |||
def test_implicit(): | |||
"""test getter/setter for the implicit attribute""" | |||
node = Tag(wraptext("br"), self_closing=True) | |||
@@ -199,6 +226,7 @@ def test_implicit(): | |||
assert node.implicit is False | |||
assert "<br/>" == node | |||
def test_padding(): | |||
"""test getter/setter for the padding attribute""" | |||
node = Tag(wraptext("ref"), wraptext("foobar")) | |||
@@ -212,6 +240,7 @@ def test_padding(): | |||
with pytest.raises(ValueError): | |||
node.__setattr__("padding", True) | |||
def test_closing_tag(): | |||
"""test getter/setter for the closing_tag attribute""" | |||
tag = wraptext("ref") | |||
@@ -222,6 +251,7 @@ def test_closing_tag(): | |||
assert_wikicode_equal(parsed, node.closing_tag) | |||
assert "<ref>foobar</ref {{ignore me}}>" == node | |||
def test_wiki_style_separator(): | |||
"""test getter/setter for wiki_style_separator attribute""" | |||
node = Tag(wraptext("table"), wraptext("\n")) | |||
@@ -233,6 +263,7 @@ def test_wiki_style_separator(): | |||
node2 = Tag(wraptext("table"), wraptext("\n"), wiki_style_separator="|") | |||
assert "|" == node2.wiki_style_separator | |||
def test_closing_wiki_markup(): | |||
"""test getter/setter for closing_wiki_markup attribute""" | |||
node = Tag(wraptext("table"), wraptext("\n")) | |||
@@ -248,12 +279,17 @@ def test_closing_wiki_markup(): | |||
node.wiki_markup = False | |||
assert node.closing_wiki_markup is None | |||
assert "<table>\n</table>" == node | |||
node2 = Tag(wraptext("table"), wraptext("\n"), | |||
attrs=[agen("id", "foo")], wiki_markup="{|", | |||
closing_wiki_markup="|}") | |||
node2 = Tag( | |||
wraptext("table"), | |||
wraptext("\n"), | |||
attrs=[agen("id", "foo")], | |||
wiki_markup="{|", | |||
closing_wiki_markup="|}", | |||
) | |||
assert "|}" == node2.closing_wiki_markup | |||
assert '{| id="foo"\n|}' == node2 | |||
def test_has(): | |||
"""test Tag.has()""" | |||
node = Tag(wraptext("ref"), wraptext("cite"), [agen("name", "foo")]) | |||
@@ -263,19 +299,26 @@ def test_has(): | |||
assert node.has("Name") is False | |||
assert node.has("foo") is False | |||
attrs = [agen("id", "foo"), agenp("class", "bar", " ", "\n", "\n"), | |||
agen("foo", "bar"), agenpnv("foo", " ", " \n ", " \t")] | |||
attrs = [ | |||
agen("id", "foo"), | |||
agenp("class", "bar", " ", "\n", "\n"), | |||
agen("foo", "bar"), | |||
agenpnv("foo", " ", " \n ", " \t"), | |||
] | |||
node2 = Tag(wraptext("div"), attrs=attrs, self_closing=True) | |||
assert node2.has("id") is True | |||
assert node2.has("class") is True | |||
assert node2.has(attrs[1].pad_first + str(attrs[1].name) + | |||
attrs[1].pad_before_eq) is True | |||
assert ( | |||
node2.has(attrs[1].pad_first + str(attrs[1].name) + attrs[1].pad_before_eq) | |||
is True | |||
) | |||
assert node2.has(attrs[3]) is True | |||
assert node2.has(str(attrs[3])) is True | |||
assert node2.has("idclass") is False | |||
assert node2.has("id class") is False | |||
assert node2.has("id=foo") is False | |||
def test_get(): | |||
"""test Tag.get()""" | |||
attrs = [agen("name", "foo")] | |||
@@ -288,13 +331,18 @@ def test_get(): | |||
with pytest.raises(ValueError): | |||
node.get("foo") | |||
attrs = [agen("id", "foo"), agenp("class", "bar", " ", "\n", "\n"), | |||
agen("foo", "bar"), agenpnv("foo", " ", " \n ", " \t")] | |||
attrs = [ | |||
agen("id", "foo"), | |||
agenp("class", "bar", " ", "\n", "\n"), | |||
agen("foo", "bar"), | |||
agenpnv("foo", " ", " \n ", " \t"), | |||
] | |||
node2 = Tag(wraptext("div"), attrs=attrs, self_closing=True) | |||
assert attrs[0] is node2.get("id") | |||
assert attrs[1] is node2.get("class") | |||
assert attrs[1] is node2.get( | |||
attrs[1].pad_first + str(attrs[1].name) + attrs[1].pad_before_eq) | |||
attrs[1].pad_first + str(attrs[1].name) + attrs[1].pad_before_eq | |||
) | |||
assert attrs[3] is node2.get(attrs[3]) | |||
assert attrs[3] is node2.get(str(attrs[3])) | |||
assert attrs[3] is node2.get(" foo") | |||
@@ -305,6 +353,7 @@ def test_get(): | |||
with pytest.raises(ValueError): | |||
node2.get("id=foo") | |||
def test_add(): | |||
"""test Tag.add()""" | |||
node = Tag(wraptext("ref"), wraptext("cite")) | |||
@@ -330,19 +379,29 @@ def test_add(): | |||
assert attr6 == node.attributes[5] | |||
assert attr7 == node.attributes[6] | |||
assert attr7 == node.get("name") | |||
assert_wikicode_equal(wrap([Template(wraptext("foobar"))]), | |||
node.attributes[5].value) | |||
assert "".join(("<ref", attr1, attr2, attr3, attr4, attr5, | |||
attr6, attr7, ">cite</ref>")) == node | |||
assert_wikicode_equal( | |||
wrap([Template(wraptext("foobar"))]), node.attributes[5].value | |||
) | |||
assert ( | |||
"".join( | |||
("<ref", attr1, attr2, attr3, attr4, attr5, attr6, attr7, ">cite</ref>") | |||
) | |||
== node | |||
) | |||
with pytest.raises(ValueError): | |||
node.add("name", "foo", quotes="bar") | |||
with pytest.raises(ValueError): | |||
node.add("name", "a bc d", quotes=None) | |||
def test_remove(): | |||
"""test Tag.remove()""" | |||
attrs = [agen("id", "foo"), agenp("class", "bar", " ", "\n", "\n"), | |||
agen("foo", "bar"), agenpnv("foo", " ", " \n ", " \t")] | |||
attrs = [ | |||
agen("id", "foo"), | |||
agenp("class", "bar", " ", "\n", "\n"), | |||
agen("foo", "bar"), | |||
agenpnv("foo", " ", " \n ", " \t"), | |||
] | |||
node = Tag(wraptext("div"), attrs=attrs, self_closing=True) | |||
node.remove("class") | |||
assert '<div id="foo" foo="bar" foo \n />' == node | |||
@@ -351,4 +410,4 @@ def test_remove(): | |||
with pytest.raises(ValueError): | |||
node.remove("foo") | |||
node.remove("id") | |||
assert '<div/>' == node | |||
assert "<div/>" == node |
@@ -34,19 +34,19 @@ from .conftest import assert_wikicode_equal, wrap, wraptext | |||
pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True) | |||
pgenh = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=False) | |||
def test_str(): | |||
"""test Template.__str__()""" | |||
node = Template(wraptext("foobar")) | |||
assert "{{foobar}}" == str(node) | |||
node2 = Template(wraptext("foo"), | |||
[pgenh("1", "bar"), pgens("abc", "def")]) | |||
node2 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("abc", "def")]) | |||
assert "{{foo|bar|abc=def}}" == str(node2) | |||
def test_children(): | |||
"""test Template.__children__()""" | |||
node2p1 = Parameter(wraptext("1"), wraptext("bar"), showkey=False) | |||
node2p2 = Parameter(wraptext("abc"), wrap([Text("def"), Text("ghi")]), | |||
showkey=True) | |||
node2p2 = Parameter(wraptext("abc"), wrap([Text("def"), Text("ghi")]), showkey=True) | |||
node1 = Template(wraptext("foobar")) | |||
node2 = Template(wraptext("foo"), [node2p1, node2p2]) | |||
@@ -62,16 +62,23 @@ def test_children(): | |||
with pytest.raises(StopIteration): | |||
next(gen2) | |||
def test_strip(): | |||
"""test Template.__strip__()""" | |||
node1 = Template(wraptext("foobar")) | |||
node2 = Template(wraptext("foo"), [ | |||
pgenh("1", "bar"), pgens("foo", ""), pgens("abc", "def")]) | |||
node3 = Template(wraptext("foo"), [ | |||
pgenh("1", "foo"), | |||
Parameter(wraptext("2"), wrap([Template(wraptext("hello"))]), | |||
showkey=False), | |||
pgenh("3", "bar")]) | |||
node2 = Template( | |||
wraptext("foo"), [pgenh("1", "bar"), pgens("foo", ""), pgens("abc", "def")] | |||
) | |||
node3 = Template( | |||
wraptext("foo"), | |||
[ | |||
pgenh("1", "foo"), | |||
Parameter( | |||
wraptext("2"), wrap([Template(wraptext("hello"))]), showkey=False | |||
), | |||
pgenh("3", "bar"), | |||
], | |||
) | |||
assert node1.__strip__(keep_template_params=False) is None | |||
assert node2.__strip__(keep_template_params=False) is None | |||
@@ -79,6 +86,7 @@ def test_strip(): | |||
assert "bar def" == node2.__strip__(keep_template_params=True) | |||
assert "foo bar" == node3.__strip__(keep_template_params=True) | |||
def test_showtree(): | |||
"""test Template.__showtree__()""" | |||
output = [] | |||
@@ -86,18 +94,32 @@ def test_showtree(): | |||
get = lambda code: output.append((getter, code)) | |||
mark = lambda: output.append(marker) | |||
node1 = Template(wraptext("foobar")) | |||
node2 = Template(wraptext("foo"), | |||
[pgenh("1", "bar"), pgens("abc", "def")]) | |||
node2 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("abc", "def")]) | |||
node1.__showtree__(output.append, get, mark) | |||
node2.__showtree__(output.append, get, mark) | |||
valid = [ | |||
"{{", (getter, node1.name), "}}", "{{", (getter, node2.name), | |||
" | ", marker, (getter, node2.params[0].name), " = ", marker, | |||
(getter, node2.params[0].value), " | ", marker, | |||
(getter, node2.params[1].name), " = ", marker, | |||
(getter, node2.params[1].value), "}}"] | |||
"{{", | |||
(getter, node1.name), | |||
"}}", | |||
"{{", | |||
(getter, node2.name), | |||
" | ", | |||
marker, | |||
(getter, node2.params[0].name), | |||
" = ", | |||
marker, | |||
(getter, node2.params[0].value), | |||
" | ", | |||
marker, | |||
(getter, node2.params[1].name), | |||
" = ", | |||
marker, | |||
(getter, node2.params[1].value), | |||
"}}", | |||
] | |||
assert valid == output | |||
def test_name(): | |||
"""test getter/setter for the name attribute""" | |||
name = wraptext("foobar") | |||
@@ -110,6 +132,7 @@ def test_name(): | |||
assert_wikicode_equal(wraptext("asdf"), node1.name) | |||
assert_wikicode_equal(wraptext("téstïng"), node2.name) | |||
def test_params(): | |||
"""test getter for the params attribute""" | |||
node1 = Template(wraptext("foobar")) | |||
@@ -118,13 +141,14 @@ def test_params(): | |||
assert [] == node1.params | |||
assert plist is node2.params | |||
def test_has(): | |||
"""test Template.has()""" | |||
node1 = Template(wraptext("foobar")) | |||
node2 = Template(wraptext("foo"), | |||
[pgenh("1", "bar"), pgens("\nabc ", "def")]) | |||
node3 = Template(wraptext("foo"), | |||
[pgenh("1", "a"), pgens("b", "c"), pgens("1", "d")]) | |||
node2 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("\nabc ", "def")]) | |||
node3 = Template( | |||
wraptext("foo"), [pgenh("1", "a"), pgens("b", "c"), pgens("1", "d")] | |||
) | |||
node4 = Template(wraptext("foo"), [pgenh("1", "a"), pgens("b", " ")]) | |||
assert node1.has("foobar", False) is False | |||
assert node2.has(1, False) is True | |||
@@ -138,6 +162,7 @@ def test_has(): | |||
assert node1.has_param("foobar", False) is False | |||
assert node2.has_param(1, False) is True | |||
def test_get(): | |||
"""test Template.get()""" | |||
node1 = Template(wraptext("foobar")) | |||
@@ -159,16 +184,15 @@ def test_get(): | |||
assert node3p2 is node3.get("1") | |||
assert node4p1 is node4.get("b ") | |||
def test_add(): | |||
"""test Template.add()""" | |||
node1 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) | |||
node2 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) | |||
node3 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) | |||
node4 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) | |||
node5 = Template(wraptext("a"), [pgens("b", "c"), | |||
pgens(" d ", "e")]) | |||
node6 = Template(wraptext("a"), [pgens("b", "c"), pgens("b", "d"), | |||
pgens("b", "e")]) | |||
node5 = Template(wraptext("a"), [pgens("b", "c"), pgens(" d ", "e")]) | |||
node6 = Template(wraptext("a"), [pgens("b", "c"), pgens("b", "d"), pgens("b", "e")]) | |||
node7 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) | |||
node8p = pgenh("1", "d") | |||
node8 = Template(wraptext("a"), [pgens("b", "c"), node8p]) | |||
@@ -176,48 +200,87 @@ def test_add(): | |||
node10 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "e")]) | |||
node11 = Template(wraptext("a"), [pgens("b", "c")]) | |||
node12 = Template(wraptext("a"), [pgens("b", "c")]) | |||
node13 = Template(wraptext("a"), [ | |||
pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) | |||
node14 = Template(wraptext("a\n"), [ | |||
pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"), | |||
pgens("h ", " i\n")]) | |||
node15 = Template(wraptext("a"), [ | |||
pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")]) | |||
node16 = Template(wraptext("a"), [ | |||
pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) | |||
node13 = Template( | |||
wraptext("a"), [pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")] | |||
) | |||
node14 = Template( | |||
wraptext("a\n"), | |||
[ | |||
pgens("b ", "c\n"), | |||
pgens("d ", " e"), | |||
pgens("f ", "g\n"), | |||
pgens("h ", " i\n"), | |||
], | |||
) | |||
node15 = Template( | |||
wraptext("a"), | |||
[pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")], | |||
) | |||
node16 = Template( | |||
wraptext("a"), [pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")] | |||
) | |||
node17 = Template(wraptext("a"), [pgenh("1", "b")]) | |||
node18 = Template(wraptext("a"), [pgenh("1", "b")]) | |||
node19 = Template(wraptext("a"), [pgenh("1", "b")]) | |||
node20 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), | |||
pgenh("3", "d"), pgenh("4", "e")]) | |||
node21 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), | |||
pgens("4", "d"), pgens("5", "e")]) | |||
node22 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), | |||
pgens("4", "d"), pgens("5", "e")]) | |||
node20 = Template( | |||
wraptext("a"), | |||
[pgenh("1", "b"), pgenh("2", "c"), pgenh("3", "d"), pgenh("4", "e")], | |||
) | |||
node21 = Template( | |||
wraptext("a"), | |||
[pgenh("1", "b"), pgenh("2", "c"), pgens("4", "d"), pgens("5", "e")], | |||
) | |||
node22 = Template( | |||
wraptext("a"), | |||
[pgenh("1", "b"), pgenh("2", "c"), pgens("4", "d"), pgens("5", "e")], | |||
) | |||
node23 = Template(wraptext("a"), [pgenh("1", "b")]) | |||
node24 = Template(wraptext("a"), [pgenh("1", "b")]) | |||
node25 = Template(wraptext("a"), [pgens("b", "c")]) | |||
node26 = Template(wraptext("a"), [pgenh("1", "b")]) | |||
node27 = Template(wraptext("a"), [pgenh("1", "b")]) | |||
node28 = Template(wraptext("a"), [pgens("1", "b")]) | |||
node29 = Template(wraptext("a"), [ | |||
pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) | |||
node30 = Template(wraptext("a\n"), [ | |||
pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"), | |||
pgens("h ", " i\n")]) | |||
node31 = Template(wraptext("a"), [ | |||
pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")]) | |||
node32 = Template(wraptext("a"), [ | |||
pgens("\nb ", " c "), pgens("\nd ", " e "), pgens("\nf ", " g ")]) | |||
node33 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), | |||
pgens("b", "f"), pgens("b", "h"), | |||
pgens("i", "j")]) | |||
node34 = Template(wraptext("a"), [pgens("1", "b"), pgens("x", "y"), | |||
pgens("1", "c"), pgens("2", "d")]) | |||
node35 = Template(wraptext("a"), [pgens("1", "b"), pgens("x", "y"), | |||
pgenh("1", "c"), pgenh("2", "d")]) | |||
node36 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), | |||
pgens("f", "g")]) | |||
node29 = Template( | |||
wraptext("a"), [pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")] | |||
) | |||
node30 = Template( | |||
wraptext("a\n"), | |||
[ | |||
pgens("b ", "c\n"), | |||
pgens("d ", " e"), | |||
pgens("f ", "g\n"), | |||
pgens("h ", " i\n"), | |||
], | |||
) | |||
node31 = Template( | |||
wraptext("a"), | |||
[pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")], | |||
) | |||
node32 = Template( | |||
wraptext("a"), | |||
[pgens("\nb ", " c "), pgens("\nd ", " e "), pgens("\nf ", " g ")], | |||
) | |||
node33 = Template( | |||
wraptext("a"), | |||
[ | |||
pgens("b", "c"), | |||
pgens("d", "e"), | |||
pgens("b", "f"), | |||
pgens("b", "h"), | |||
pgens("i", "j"), | |||
], | |||
) | |||
node34 = Template( | |||
wraptext("a"), | |||
[pgens("1", "b"), pgens("x", "y"), pgens("1", "c"), pgens("2", "d")], | |||
) | |||
node35 = Template( | |||
wraptext("a"), | |||
[pgens("1", "b"), pgens("x", "y"), pgenh("1", "c"), pgenh("2", "d")], | |||
) | |||
node36 = Template( | |||
wraptext("a"), [pgens("b", "c"), pgens("d", "e"), pgens("f", "g")] | |||
) | |||
node37 = Template(wraptext("a"), [pgenh("1", "")]) | |||
node38 = Template(wraptext("abc")) | |||
node39 = Template(wraptext("a"), [pgenh("1", " b ")]) | |||
@@ -320,65 +383,121 @@ def test_add(): | |||
assert "{{a|1= b|2= c|3= d}}" == node41 | |||
assert "{{a|b=hello \n}}" == node42 | |||
def test_remove(): | |||
"""test Template.remove()""" | |||
node1 = Template(wraptext("foobar")) | |||
node2 = Template(wraptext("foo"), | |||
[pgenh("1", "bar"), pgens("abc", "def")]) | |||
node3 = Template(wraptext("foo"), | |||
[pgenh("1", "bar"), pgens("abc", "def")]) | |||
node4 = Template(wraptext("foo"), | |||
[pgenh("1", "bar"), pgenh("2", "baz")]) | |||
node5 = Template(wraptext("foo"), [ | |||
pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) | |||
node6 = Template(wraptext("foo"), [ | |||
pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) | |||
node7 = Template(wraptext("foo"), [ | |||
pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")]) | |||
node8 = Template(wraptext("foo"), [ | |||
pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")]) | |||
node9 = Template(wraptext("foo"), [ | |||
pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) | |||
node10 = Template(wraptext("foo"), [ | |||
pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) | |||
node11 = Template(wraptext("foo"), [ | |||
pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) | |||
node12 = Template(wraptext("foo"), [ | |||
pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) | |||
node13 = Template(wraptext("foo"), [ | |||
pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) | |||
node14 = Template(wraptext("foo"), [ | |||
pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) | |||
node15 = Template(wraptext("foo"), [ | |||
pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) | |||
node16 = Template(wraptext("foo"), [ | |||
pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) | |||
node17 = Template(wraptext("foo"), [ | |||
pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) | |||
node18 = Template(wraptext("foo"), [ | |||
pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) | |||
node19 = Template(wraptext("foo"), [ | |||
pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) | |||
node20 = Template(wraptext("foo"), [ | |||
pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) | |||
node21 = Template(wraptext("foo"), [ | |||
pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), | |||
pgens("a", "b")]) | |||
node22 = Template(wraptext("foo"), [ | |||
pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), | |||
pgens("a", "b")]) | |||
node23 = Template(wraptext("foo"), [ | |||
pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), | |||
pgens("a", "b")]) | |||
node24 = Template(wraptext("foo"), [ | |||
pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), | |||
pgens("a", "b")]) | |||
node25 = Template(wraptext("foo"), [ | |||
pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), | |||
pgens("a", "b")]) | |||
node26 = Template(wraptext("foo"), [ | |||
pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), | |||
pgens("a", "b")]) | |||
node2 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("abc", "def")]) | |||
node3 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("abc", "def")]) | |||
node4 = Template(wraptext("foo"), [pgenh("1", "bar"), pgenh("2", "baz")]) | |||
node5 = Template( | |||
wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] | |||
) | |||
node6 = Template( | |||
wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] | |||
) | |||
node7 = Template( | |||
wraptext("foo"), [pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")] | |||
) | |||
node8 = Template( | |||
wraptext("foo"), [pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")] | |||
) | |||
node9 = Template( | |||
wraptext("foo"), [pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")] | |||
) | |||
node10 = Template( | |||
wraptext("foo"), [pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")] | |||
) | |||
node11 = Template( | |||
wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] | |||
) | |||
node12 = Template( | |||
wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] | |||
) | |||
node13 = Template( | |||
wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] | |||
) | |||
node14 = Template( | |||
wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] | |||
) | |||
node15 = Template( | |||
wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] | |||
) | |||
node16 = Template( | |||
wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] | |||
) | |||
node17 = Template( | |||
wraptext("foo"), [pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")] | |||
) | |||
node18 = Template( | |||
wraptext("foo"), [pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")] | |||
) | |||
node19 = Template( | |||
wraptext("foo"), [pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")] | |||
) | |||
node20 = Template( | |||
wraptext("foo"), [pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")] | |||
) | |||
node21 = Template( | |||
wraptext("foo"), | |||
[ | |||
pgens("a", "b"), | |||
pgens("c", "d"), | |||
pgens("e", "f"), | |||
pgens("a", "b"), | |||
pgens("a", "b"), | |||
], | |||
) | |||
node22 = Template( | |||
wraptext("foo"), | |||
[ | |||
pgens("a", "b"), | |||
pgens("c", "d"), | |||
pgens("e", "f"), | |||
pgens("a", "b"), | |||
pgens("a", "b"), | |||
], | |||
) | |||
node23 = Template( | |||
wraptext("foo"), | |||
[ | |||
pgens("a", "b"), | |||
pgens("c", "d"), | |||
pgens("e", "f"), | |||
pgens("a", "b"), | |||
pgens("a", "b"), | |||
], | |||
) | |||
node24 = Template( | |||
wraptext("foo"), | |||
[ | |||
pgens("a", "b"), | |||
pgens("c", "d"), | |||
pgens("e", "f"), | |||
pgens("a", "b"), | |||
pgens("a", "b"), | |||
], | |||
) | |||
node25 = Template( | |||
wraptext("foo"), | |||
[ | |||
pgens("a", "b"), | |||
pgens("c", "d"), | |||
pgens("e", "f"), | |||
pgens("a", "b"), | |||
pgens("a", "b"), | |||
], | |||
) | |||
node26 = Template( | |||
wraptext("foo"), | |||
[ | |||
pgens("a", "b"), | |||
pgens("c", "d"), | |||
pgens("e", "f"), | |||
pgens("a", "b"), | |||
pgens("a", "b"), | |||
], | |||
) | |||
node27 = Template(wraptext("foo"), [pgenh("1", "bar")]) | |||
node28 = Template(wraptext("foo"), [pgenh("1", "bar")]) | |||
@@ -444,12 +563,14 @@ def test_remove(): | |||
with pytest.raises(ValueError): | |||
node27.remove(node28.get(1)) | |||
def test_formatting(): | |||
"""test realistic param manipulation with complex whitespace formatting | |||
(assumes that parsing works correctly)""" | |||
tests = [ | |||
# https://en.wikipedia.org/w/index.php?title=Lamar_County,_Georgia&oldid=792356004 | |||
("""{{Infobox U.S. county | |||
# https://en.wikipedia.org/w/index.php?title=Lamar_County,_Georgia&oldid=792356004 | |||
( | |||
"""{{Infobox U.S. county | |||
| county = Lamar County | |||
| state = Georgia | |||
| seal = | |||
@@ -471,16 +592,17 @@ def test_formatting(): | |||
| district = 3rd | |||
| named for = [[Lucius Quintus Cincinnatus Lamar II]] | |||
}}""", | |||
"""@@ -11,4 +11,4 @@ | |||
"""@@ -11,4 +11,4 @@ | |||
| area percentage = 1.3% | |||
-| census yr = 2010 | |||
-| pop = 18317 | |||
+| census estimate yr = 2016 | |||
+| pop = 12345<ref>example ref</ref> | |||
| density_sq_mi = 100"""), | |||
# https://en.wikipedia.org/w/index.php?title=Rockdale_County,_Georgia&oldid=792359760 | |||
("""{{Infobox U.S. County| | |||
| density_sq_mi = 100""", | |||
), | |||
# https://en.wikipedia.org/w/index.php?title=Rockdale_County,_Georgia&oldid=792359760 | |||
( | |||
"""{{Infobox U.S. County| | |||
county = Rockdale County | | |||
state = Georgia | | |||
seal = | | |||
@@ -500,16 +622,17 @@ def test_formatting(): | |||
| district = 4th | |||
| time zone= Eastern | |||
}}""", | |||
"""@@ -11,4 +11,4 @@ | |||
"""@@ -11,4 +11,4 @@ | |||
area percentage = 1.7% | | |||
- census yr = 2010| | |||
- pop = 85215 | | |||
+ census estimate yr = 2016 | | |||
+ pop = 12345<ref>example ref</ref> | | |||
density_sq_mi = 657 |"""), | |||
# https://en.wikipedia.org/w/index.php?title=Spalding_County,_Georgia&oldid=792360413 | |||
("""{{Infobox U.S. County| | |||
density_sq_mi = 657 |""", | |||
), | |||
# https://en.wikipedia.org/w/index.php?title=Spalding_County,_Georgia&oldid=792360413 | |||
( | |||
"""{{Infobox U.S. County| | |||
| county = Spalding County | | |||
| state = Georgia | | |||
| seal = | | |||
@@ -530,16 +653,17 @@ def test_formatting(): | |||
| district = 3rd | |||
| time zone = Eastern | |||
}}""", | |||
"""@@ -11,4 +11,4 @@ | |||
"""@@ -11,4 +11,4 @@ | |||
| area percentage = 1.6% | | |||
-| census yr = 2010| | |||
-| pop = 64073 | | |||
+| | |||
+| census estimate yr = 2016 | pop = 12345<ref>example ref</ref> | | |||
| density_sq_mi = 326 |"""), | |||
# https://en.wikipedia.org/w/index.php?title=Clinton_County,_Illinois&oldid=794694648 | |||
("""{{Infobox U.S. county | |||
| density_sq_mi = 326 |""", | |||
), | |||
# https://en.wikipedia.org/w/index.php?title=Clinton_County,_Illinois&oldid=794694648 | |||
( | |||
"""{{Infobox U.S. county | |||
|county = Clinton County | |||
|state = Illinois | |||
| ex image = File:Clinton County Courthouse, Carlyle.jpg | |||
@@ -560,16 +684,17 @@ def test_formatting(): | |||
|web = www.clintonco.illinois.gov | |||
| district = 15th | |||
}}""", | |||
"""@@ -15,4 +15,4 @@ | |||
"""@@ -15,4 +15,4 @@ | |||
|area percentage = 5.8% | |||
- |census yr = 2010 | |||
- |pop = 37762 | |||
+ |census estimate yr = 2016 | |||
+ |pop = 12345<ref>example ref</ref> | |||
|density_sq_mi = 80"""), | |||
# https://en.wikipedia.org/w/index.php?title=Winnebago_County,_Illinois&oldid=789193800 | |||
("""{{Infobox U.S. county | | |||
|density_sq_mi = 80""", | |||
), | |||
# https://en.wikipedia.org/w/index.php?title=Winnebago_County,_Illinois&oldid=789193800 | |||
( | |||
"""{{Infobox U.S. county | | |||
county = Winnebago County | | |||
state = Illinois | | |||
seal = Winnebago County il seal.png | | |||
@@ -590,19 +715,21 @@ def test_formatting(): | |||
| district = 16th | |||
| district2 = 17th | |||
}}""", | |||
"""@@ -11,4 +11,4 @@ | |||
"""@@ -11,4 +11,4 @@ | |||
area percentage = 1.1% | | |||
- census yr = 2010| | |||
- pop = 295266 | | |||
+ census estimate yr = 2016| | |||
+ pop = 12345<ref>example ref</ref> | | |||
density_sq_mi = 575""")] | |||
density_sq_mi = 575""", | |||
), | |||
] | |||
for (original, expected) in tests: | |||
code = parse(original) | |||
template = code.filter_templates()[0] | |||
template.add("pop", "12345<ref>example ref</ref>") | |||
template.add('census estimate yr', "2016", before="pop") | |||
template.add("census estimate yr", "2016", before="pop") | |||
template.remove("census yr") | |||
oldlines = original.splitlines(True) | |||
@@ -26,6 +26,7 @@ import pytest | |||
from mwparserfromhell.nodes import Text | |||
def test_str(): | |||
"""test Text.__str__()""" | |||
node = Text("foobar") | |||
@@ -33,6 +34,7 @@ def test_str(): | |||
node2 = Text("fóóbar") | |||
assert "fóóbar" == str(node2) | |||
def test_children(): | |||
"""test Text.__children__()""" | |||
node = Text("foobar") | |||
@@ -40,11 +42,13 @@ def test_children(): | |||
with pytest.raises(StopIteration): | |||
next(gen) | |||
def test_strip(): | |||
"""test Text.__strip__()""" | |||
node = Text("foobar") | |||
assert node is node.__strip__() | |||
def test_showtree(): | |||
"""test Text.__showtree__()""" | |||
output = [] | |||
@@ -57,6 +61,7 @@ def test_showtree(): | |||
res = ["foobar", r"f\xf3\xf3bar", "\\U00010332\\U0001033f\\U00010344"] | |||
assert res == output | |||
def test_value(): | |||
"""test getter/setter for the value attribute""" | |||
node = Text("foobar") | |||
@@ -33,29 +33,32 @@ try: | |||
except ImportError: | |||
CTokenizer = None | |||
class _TestParseError(Exception): | |||
"""Raised internally when a test could not be parsed.""" | |||
def _parse_test(test, data): | |||
"""Parse an individual *test*, storing its info in *data*.""" | |||
for line in test.strip().splitlines(): | |||
if line.startswith("name:"): | |||
data["name"] = line[len("name:"):].strip() | |||
data["name"] = line[len("name:") :].strip() | |||
elif line.startswith("label:"): | |||
data["label"] = line[len("label:"):].strip() | |||
data["label"] = line[len("label:") :].strip() | |||
elif line.startswith("input:"): | |||
raw = line[len("input:"):].strip() | |||
raw = line[len("input:") :].strip() | |||
if raw[0] == '"' and raw[-1] == '"': | |||
raw = raw[1:-1] | |||
raw = raw.encode("raw_unicode_escape") | |||
data["input"] = raw.decode("unicode_escape") | |||
elif line.startswith("output:"): | |||
raw = line[len("output:"):].strip() | |||
raw = line[len("output:") :].strip() | |||
try: | |||
data["output"] = eval(raw, vars(tokens)) | |||
except Exception as err: | |||
raise _TestParseError(err) from err | |||
def _load_tests(filename, name, text): | |||
"""Load all tests in *text* from the file *filename*.""" | |||
tests = text.split("\n---\n") | |||
@@ -77,15 +80,18 @@ def _load_tests(filename, name, text): | |||
warnings.warn(error.format(filename)) | |||
continue | |||
if data["input"] is None or data["output"] is None: | |||
error = "Test '{}' in '{}' was ignored because it lacked an input or an output" | |||
error = ( | |||
"Test '{}' in '{}' was ignored because it lacked an input or an output" | |||
) | |||
warnings.warn(error.format(data["name"], filename)) | |||
continue | |||
# Include test filename in name | |||
data['name'] = '{}:{}'.format(name, data['name']) | |||
data["name"] = "{}:{}".format(name, data["name"]) | |||
yield data | |||
def build(): | |||
"""Load and install all tests from the 'tokenizer' directory.""" | |||
directory = path.join(path.dirname(__file__), "tokenizer") | |||
@@ -96,31 +102,37 @@ def build(): | |||
fullname = path.join(directory, filename) | |||
with codecs.open(fullname, "r", encoding="utf8") as fp: | |||
text = fp.read() | |||
name = path.split(fullname)[1][:-len(extension)] | |||
name = path.split(fullname)[1][: -len(extension)] | |||
yield from _load_tests(fullname, name, text) | |||
@pytest.mark.parametrize("tokenizer", filter(None, ( | |||
CTokenizer, PyTokenizer | |||
)), ids=lambda t: 'CTokenizer' if t.USES_C else 'PyTokenizer') | |||
@pytest.mark.parametrize("data", build(), ids=lambda data: data['name']) | |||
@pytest.mark.parametrize( | |||
"tokenizer", | |||
filter(None, (CTokenizer, PyTokenizer)), | |||
ids=lambda t: "CTokenizer" if t.USES_C else "PyTokenizer", | |||
) | |||
@pytest.mark.parametrize("data", build(), ids=lambda data: data["name"]) | |||
def test_tokenizer(tokenizer, data): | |||
expected = data["output"] | |||
actual = tokenizer().tokenize(data["input"]) | |||
assert expected == actual | |||
@pytest.mark.parametrize("data", build(), ids=lambda data: data['name']) | |||
@pytest.mark.parametrize("data", build(), ids=lambda data: data["name"]) | |||
def test_roundtrip(data): | |||
expected = data["input"] | |||
actual = str(Builder().build(data["output"][:])) | |||
assert expected == actual | |||
@pytest.mark.skipif(CTokenizer is None, reason='CTokenizer not available') | |||
@pytest.mark.skipif(CTokenizer is None, reason="CTokenizer not available") | |||
def test_c_tokenizer_uses_c(): | |||
"""make sure the C tokenizer identifies as using a C extension""" | |||
assert CTokenizer.USES_C is True | |||
assert CTokenizer().USES_C is True | |||
def test_describe_context(): | |||
assert "" == contexts.describe(0) | |||
ctx = contexts.describe(contexts.TEMPLATE_PARAM_KEY|contexts.HAS_TEXT) | |||
ctx = contexts.describe(contexts.TEMPLATE_PARAM_KEY | contexts.HAS_TEXT) | |||
assert "TEMPLATE_PARAM_KEY|HAS_TEXT" == ctx |
@@ -26,6 +26,7 @@ import pytest | |||
from mwparserfromhell.parser import tokens | |||
@pytest.mark.parametrize("name", tokens.__all__) | |||
def test_issubclass(name): | |||
"""check that all classes within the tokens module are really Tokens""" | |||
@@ -34,6 +35,7 @@ def test_issubclass(name): | |||
assert isinstance(klass(), klass) | |||
assert isinstance(klass(), tokens.Token) | |||
def test_attributes(): | |||
"""check that Token attributes can be managed properly""" | |||
token1 = tokens.Token() | |||
@@ -54,6 +56,7 @@ def test_attributes(): | |||
with pytest.raises(KeyError): | |||
token2.__delattr__("baz") | |||
def test_repr(): | |||
"""check that repr() on a Token works as expected""" | |||
token1 = tokens.Token() | |||
@@ -65,6 +68,7 @@ def test_repr(): | |||
assert repr(token2) in ("Token(foo='bar', baz=123)", "Token(baz=123, foo='bar')") | |||
assert "Text(text='" + hundredchars + "')" == repr(token3) | |||
def test_equality(): | |||
"""check that equivalent tokens are considered equal""" | |||
token1 = tokens.Token() | |||
@@ -83,11 +87,11 @@ def test_equality(): | |||
assert token4 != token6 | |||
assert token5 != token6 | |||
@pytest.mark.parametrize("token", [ | |||
tokens.Token(), | |||
tokens.Token(foo="bar", baz=123), | |||
tokens.Text(text="earwig") | |||
]) | |||
@pytest.mark.parametrize( | |||
"token", | |||
[tokens.Token(), tokens.Token(foo="bar", baz=123), tokens.Text(text="earwig")], | |||
) | |||
def test_repr_equality(token): | |||
"""check that eval(repr(token)) == token""" | |||
assert token == eval(repr(token), vars(tokens)) |
@@ -28,28 +28,33 @@ from mwparserfromhell.nodes import Template, Text | |||
from mwparserfromhell.utils import parse_anything | |||
from .conftest import assert_wikicode_equal, wrap, wraptext | |||
@pytest.mark.parametrize("test,valid", [ | |||
(wraptext("foobar"), wraptext("foobar")), | |||
(Template(wraptext("spam")), wrap([Template(wraptext("spam"))])), | |||
("fóóbar", wraptext("fóóbar")), | |||
(b"foob\xc3\xa1r", wraptext("foobár")), | |||
(123, wraptext("123")), | |||
(True, wraptext("True")), | |||
(None, wrap([])), | |||
([Text("foo"), Text("bar"), Text("baz")], | |||
wraptext("foo", "bar", "baz")), | |||
([wraptext("foo"), Text("bar"), "baz", 123, 456], | |||
wraptext("foo", "bar", "baz", "123", "456")), | |||
([[[([[((("foo",),),)], "bar"],)]]], wraptext("foo", "bar")) | |||
]) | |||
@pytest.mark.parametrize( | |||
"test,valid", | |||
[ | |||
(wraptext("foobar"), wraptext("foobar")), | |||
(Template(wraptext("spam")), wrap([Template(wraptext("spam"))])), | |||
("fóóbar", wraptext("fóóbar")), | |||
(b"foob\xc3\xa1r", wraptext("foobár")), | |||
(123, wraptext("123")), | |||
(True, wraptext("True")), | |||
(None, wrap([])), | |||
([Text("foo"), Text("bar"), Text("baz")], wraptext("foo", "bar", "baz")), | |||
( | |||
[wraptext("foo"), Text("bar"), "baz", 123, 456], | |||
wraptext("foo", "bar", "baz", "123", "456"), | |||
), | |||
([[[([[((("foo",),),)], "bar"],)]]], wraptext("foo", "bar")), | |||
], | |||
) | |||
def test_parse_anything_valid(test, valid): | |||
"""tests for valid input to utils.parse_anything()""" | |||
assert_wikicode_equal(valid, parse_anything(test)) | |||
@pytest.mark.parametrize("invalid", [ | |||
Ellipsis, object, object(), type, | |||
["foo", [object]] | |||
]) | |||
@pytest.mark.parametrize( | |||
"invalid", [Ellipsis, object, object(), type, ["foo", [object]]] | |||
) | |||
def test_parse_anything_invalid(invalid): | |||
"""tests for invalid input to utils.parse_anything()""" | |||
with pytest.raises(ValueError): | |||
@@ -34,6 +34,7 @@ from mwparserfromhell.wikicode import Wikicode | |||
from mwparserfromhell import parse | |||
from .conftest import wrap, wraptext | |||
def test_str(): | |||
"""test Wikicode.__str__()""" | |||
code1 = parse("foobar") | |||
@@ -41,6 +42,7 @@ def test_str(): | |||
assert "foobar" == str(code1) | |||
assert "Have a {{template}} and a [[page|link]]" == str(code2) | |||
def test_nodes(): | |||
"""test getter/setter for the nodes attribute""" | |||
code = parse("Have a {{template}}") | |||
@@ -57,6 +59,7 @@ def test_nodes(): | |||
with pytest.raises(ValueError): | |||
code.__setattr__("nodes", object) | |||
def test_get(): | |||
"""test Wikicode.get()""" | |||
code = parse("Have a {{template}} and a [[page|link]]") | |||
@@ -65,6 +68,7 @@ def test_get(): | |||
with pytest.raises(IndexError): | |||
code.get(4) | |||
def test_set(): | |||
"""test Wikicode.set()""" | |||
code = parse("Have a {{template}} and a [[page|link]]") | |||
@@ -82,6 +86,7 @@ def test_set(): | |||
with pytest.raises(IndexError): | |||
code.set(-4, "{{baz}}") | |||
def test_contains(): | |||
"""test Wikicode.contains()""" | |||
code = parse("Here is {{aaa|{{bbb|xyz{{ccc}}}}}} and a [[page|link]]") | |||
@@ -93,6 +98,7 @@ def test_contains(): | |||
assert code.contains(str(tmpl4)) is True | |||
assert code.contains(tmpl2.params[0].value) is True | |||
def test_index(): | |||
"""test Wikicode.index()""" | |||
code = parse("Have a {{template}} and a [[page|link]]") | |||
@@ -105,13 +111,13 @@ def test_index(): | |||
code = parse("{{foo}}{{bar|{{baz}}}}") | |||
assert 1 == code.index("{{bar|{{baz}}}}") | |||
assert 1 == code.index("{{baz}}", recursive=True) | |||
assert 1 == code.index(code.get(1).get(1).value, | |||
recursive=True) | |||
assert 1 == code.index(code.get(1).get(1).value, recursive=True) | |||
with pytest.raises(ValueError): | |||
code.index("{{baz}}", recursive=False) | |||
with pytest.raises(ValueError): | |||
code.index(code.get(1).get(1).value, recursive=False) | |||
def test_get_ancestors_parent(): | |||
"""test Wikicode.get_ancestors() and Wikicode.get_parent()""" | |||
code = parse("{{a|{{b|{{d|{{e}}{{f}}}}{{g}}}}}}{{c}}") | |||
@@ -130,6 +136,7 @@ def test_get_ancestors_parent(): | |||
with pytest.raises(ValueError): | |||
code.get_parent(fake) | |||
def test_insert(): | |||
"""test Wikicode.insert()""" | |||
code = parse("Have a {{template}} and a [[page|link]]") | |||
@@ -144,14 +151,22 @@ def test_insert(): | |||
code2 = parse("{{foo}}{{bar}}{{baz}}") | |||
code2.insert(1, "abc{{def}}ghi[[jk]]") | |||
assert "{{foo}}abc{{def}}ghi[[jk]]{{bar}}{{baz}}" == code2 | |||
assert ["{{foo}}", "abc", "{{def}}", "ghi", "[[jk]]", | |||
"{{bar}}", "{{baz}}"] == code2.nodes | |||
assert [ | |||
"{{foo}}", | |||
"abc", | |||
"{{def}}", | |||
"ghi", | |||
"[[jk]]", | |||
"{{bar}}", | |||
"{{baz}}", | |||
] == code2.nodes | |||
code3 = parse("{{foo}}bar") | |||
code3.insert(1000, "[[baz]]") | |||
code3.insert(-1000, "derp") | |||
assert "derp{{foo}}bar[[baz]]" == code3 | |||
def _test_search(meth, expected): | |||
"""Base test for insert_before(), insert_after(), and replace().""" | |||
code = parse("{{a}}{{b}}{{c}}{{d}}{{e}}") | |||
@@ -249,6 +264,7 @@ def _test_search(meth, expected): | |||
meth(code9, code9.get_sections()[0], "{{quz}}") | |||
assert expected[8] == code9 | |||
def test_insert_before(): | |||
"""test Wikicode.insert_before()""" | |||
meth = lambda code, *args, **kw: code.insert_before(*args, **kw) | |||
@@ -265,6 +281,7 @@ def test_insert_before(): | |||
] | |||
_test_search(meth, expected) | |||
def test_insert_after(): | |||
"""test Wikicode.insert_after()""" | |||
meth = lambda code, *args, **kw: code.insert_after(*args, **kw) | |||
@@ -281,6 +298,7 @@ def test_insert_after(): | |||
] | |||
_test_search(meth, expected) | |||
def test_replace(): | |||
"""test Wikicode.replace()""" | |||
meth = lambda code, *args, **kw: code.replace(*args, **kw) | |||
@@ -297,6 +315,7 @@ def test_replace(): | |||
] | |||
_test_search(meth, expected) | |||
def test_append(): | |||
"""test Wikicode.append()""" | |||
code = parse("Have a {{template}}") | |||
@@ -310,6 +329,7 @@ def test_append(): | |||
with pytest.raises(ValueError): | |||
code.append(slice(0, 1)) | |||
def test_remove(): | |||
"""test Wikicode.remove()""" | |||
meth = lambda code, obj, value, **kw: code.remove(obj, **kw) | |||
@@ -326,6 +346,7 @@ def test_remove(): | |||
] | |||
_test_search(meth, expected) | |||
def test_matches(): | |||
"""test Wikicode.matches()""" | |||
code1 = parse("Cleanup") | |||
@@ -357,17 +378,32 @@ def test_matches(): | |||
assert code5.matches("<!-- nothing -->") is True | |||
assert code5.matches(("a", "b", "")) is True | |||
def test_filter_family(): | |||
"""test the Wikicode.i?filter() family of functions""" | |||
def genlist(gen): | |||
assert isinstance(gen, GeneratorType) | |||
return list(gen) | |||
ifilter = lambda code: (lambda *a, **k: genlist(code.ifilter(*a, **k))) | |||
code = parse("a{{b}}c[[d]]{{{e}}}{{f}}[[g]]") | |||
for func in (code.filter, ifilter(code)): | |||
assert ["a", "{{b}}", "b", "c", "[[d]]", "d", "{{{e}}}", | |||
"e", "{{f}}", "f", "[[g]]", "g"] == func() | |||
assert [ | |||
"a", | |||
"{{b}}", | |||
"b", | |||
"c", | |||
"[[d]]", | |||
"d", | |||
"{{{e}}}", | |||
"e", | |||
"{{f}}", | |||
"f", | |||
"[[g]]", | |||
"g", | |||
] == func() | |||
assert ["{{{e}}}"] == func(forcetype=Argument) | |||
assert code.get(4) is func(forcetype=Argument)[0] | |||
assert list("abcdefg") == func(forcetype=Text) | |||
@@ -377,7 +413,7 @@ def test_filter_family(): | |||
funcs = [ | |||
lambda name, **kw: getattr(code, "filter_" + name)(**kw), | |||
lambda name, **kw: genlist(getattr(code, "ifilter_" + name)(**kw)) | |||
lambda name, **kw: genlist(getattr(code, "ifilter_" + name)(**kw)), | |||
] | |||
for get_filter in funcs: | |||
assert ["{{{e}}}"] == get_filter("arguments") | |||
@@ -393,27 +429,35 @@ def test_filter_family(): | |||
code2 = parse("{{a|{{b}}|{{c|d={{f}}{{h}}}}}}") | |||
for func in (code2.filter, ifilter(code2)): | |||
assert ["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"] \ | |||
== func(recursive=False, forcetype=Template) | |||
assert ["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}", | |||
"{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"] \ | |||
== func(recursive=True, forcetype=Template) | |||
assert ["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"] == func( | |||
recursive=False, forcetype=Template | |||
) | |||
assert [ | |||
"{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", | |||
"{{b}}", | |||
"{{c|d={{f}}{{h}}}}", | |||
"{{f}}", | |||
"{{h}}", | |||
] == func(recursive=True, forcetype=Template) | |||
code3 = parse("{{foobar}}{{FOO}}{{baz}}{{bz}}{{barfoo}}") | |||
for func in (code3.filter, ifilter(code3)): | |||
assert ["{{foobar}}", "{{barfoo}}"] \ | |||
== func(False, matches=lambda node: "foo" in node) | |||
assert ["{{foobar}}", "{{FOO}}", "{{barfoo}}"] \ | |||
== func(False, matches=r"foo") | |||
assert ["{{foobar}}", "{{FOO}}"] \ | |||
== func(matches=r"^{{foo.*?}}") | |||
assert ["{{foobar}}"] \ | |||
== func(matches=r"^{{foo.*?}}", flags=re.UNICODE) | |||
assert ["{{foobar}}", "{{barfoo}}"] == func( | |||
False, matches=lambda node: "foo" in node | |||
) | |||
assert ["{{foobar}}", "{{FOO}}", "{{barfoo}}"] == func(False, matches=r"foo") | |||
assert ["{{foobar}}", "{{FOO}}"] == func(matches=r"^{{foo.*?}}") | |||
assert ["{{foobar}}"] == func(matches=r"^{{foo.*?}}", flags=re.UNICODE) | |||
assert ["{{baz}}", "{{bz}}"] == func(matches=r"^{{b.*?z") | |||
assert ["{{baz}}"] == func(matches=r"^{{b.+?z}}") | |||
exp_rec = ["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}", | |||
"{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"] | |||
exp_rec = [ | |||
"{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", | |||
"{{b}}", | |||
"{{c|d={{f}}{{h}}}}", | |||
"{{f}}", | |||
"{{h}}", | |||
] | |||
exp_unrec = ["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"] | |||
assert exp_rec == code2.filter_templates() | |||
assert exp_unrec == code2.filter_templates(recursive=False) | |||
@@ -422,9 +466,9 @@ def test_filter_family(): | |||
assert exp_unrec == code2.filter_templates(False) | |||
assert ["{{foobar}}"] == code3.filter_templates( | |||
matches=lambda node: node.name.matches("Foobar")) | |||
assert ["{{baz}}", "{{bz}}"] \ | |||
== code3.filter_templates(matches=r"^{{b.*?z") | |||
matches=lambda node: node.name.matches("Foobar") | |||
) | |||
assert ["{{baz}}", "{{bz}}"] == code3.filter_templates(matches=r"^{{b.*?z") | |||
assert [] == code3.filter_tags(matches=r"^{{b.*?z") | |||
assert [] == code3.filter_tags(matches=r"^{{b.*?z", flags=0) | |||
with pytest.raises(TypeError): | |||
@@ -440,6 +484,7 @@ def test_filter_family(): | |||
assert ["{{foo}}", "{{foo|{{bar}}}}"] == actual1 | |||
assert ["{{foo}}", "{{foo|{{bar}}}}"] == actual2 | |||
def test_get_sections(): | |||
"""test Wikicode.get_sections()""" | |||
page1 = parse("") | |||
@@ -461,44 +506,70 @@ def test_get_sections(): | |||
assert [""] == page1.get_sections() | |||
assert ["", "==Heading=="] == page2.get_sections() | |||
assert ["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n", "====Gnidaeh====\n"] \ | |||
== page3.get_sections() | |||
assert [p4_lead, p4_I, p4_IA, p4_IB, p4_IB1, p4_II, | |||
p4_III, p4_IIIA, p4_IIIA1a, p4_IIIA2, p4_IIIA2ai1] \ | |||
== page4.get_sections() | |||
assert [ | |||
"", | |||
"===Heading===\nFoo bar baz\n====Gnidaeh====\n", | |||
"====Gnidaeh====\n", | |||
] == page3.get_sections() | |||
assert [ | |||
p4_lead, | |||
p4_I, | |||
p4_IA, | |||
p4_IB, | |||
p4_IB1, | |||
p4_II, | |||
p4_III, | |||
p4_IIIA, | |||
p4_IIIA1a, | |||
p4_IIIA2, | |||
p4_IIIA2ai1, | |||
] == page4.get_sections() | |||
assert ["====Gnidaeh====\n"] == page3.get_sections(levels=[4]) | |||
assert ["===Heading===\nFoo bar baz\n====Gnidaeh====\n"] \ | |||
== page3.get_sections(levels=(2, 3)) | |||
assert ["===Heading===\nFoo bar baz\n"] \ | |||
== page3.get_sections(levels=(2, 3), flat=True) | |||
assert ["===Heading===\nFoo bar baz\n====Gnidaeh====\n"] == page3.get_sections( | |||
levels=(2, 3) | |||
) | |||
assert ["===Heading===\nFoo bar baz\n"] == page3.get_sections( | |||
levels=(2, 3), flat=True | |||
) | |||
assert [] == page3.get_sections(levels=[0]) | |||
assert ["", "====Gnidaeh====\n"] == page3.get_sections(levels=[4], include_lead=True) | |||
assert ["===Heading===\nFoo bar baz\n====Gnidaeh====\n", | |||
"====Gnidaeh====\n"] == page3.get_sections(include_lead=False) | |||
assert ["===Heading===\nFoo bar baz\n", "====Gnidaeh====\n"] \ | |||
== page3.get_sections(flat=True, include_lead=False) | |||
assert ["", "====Gnidaeh====\n"] == page3.get_sections( | |||
levels=[4], include_lead=True | |||
) | |||
assert [ | |||
"===Heading===\nFoo bar baz\n====Gnidaeh====\n", | |||
"====Gnidaeh====\n", | |||
] == page3.get_sections(include_lead=False) | |||
assert ["===Heading===\nFoo bar baz\n", "====Gnidaeh====\n"] == page3.get_sections( | |||
flat=True, include_lead=False | |||
) | |||
assert [p4_IB1, p4_IIIA2] == page4.get_sections(levels=[4]) | |||
assert [p4_IA, p4_IB, p4_IIIA] == page4.get_sections(levels=[3]) | |||
assert [p4_IA, "=== Section I.B ===\n", | |||
"=== Section III.A ===\nText.\n"] \ | |||
== page4.get_sections(levels=[3], flat=True) | |||
assert [ | |||
p4_IA, | |||
"=== Section I.B ===\n", | |||
"=== Section III.A ===\nText.\n", | |||
] == page4.get_sections(levels=[3], flat=True) | |||
assert ["", ""] == page2.get_sections(include_headings=False) | |||
assert ["\nSection I.B.1 body.\n\n•Some content.\n\n", | |||
"\nEven more text.\n" + p4_IIIA2ai1] \ | |||
== page4.get_sections(levels=[4], include_headings=False) | |||
assert [ | |||
"\nSection I.B.1 body.\n\n•Some content.\n\n", | |||
"\nEven more text.\n" + p4_IIIA2ai1, | |||
] == page4.get_sections(levels=[4], include_headings=False) | |||
assert [] == page4.get_sections(matches=r"body") | |||
assert [p4_I, p4_IA, p4_IB, p4_IB1] \ | |||
== page4.get_sections(matches=r"Section\sI[.\s].*?") | |||
assert [p4_IA, p4_IIIA, p4_IIIA1a, p4_IIIA2, p4_IIIA2ai1] \ | |||
== page4.get_sections(matches=r".*?a.*?") | |||
assert [p4_IIIA1a, p4_IIIA2ai1] \ | |||
== page4.get_sections(matches=r".*?a.*?", flags=re.U) | |||
assert ["\nMore text.\n", "\nAn invalid section!"] \ | |||
== page4.get_sections(matches=r".*?a.*?", flags=re.U, | |||
include_headings=False) | |||
assert [p4_I, p4_IA, p4_IB, p4_IB1] == page4.get_sections( | |||
matches=r"Section\sI[.\s].*?" | |||
) | |||
assert [p4_IA, p4_IIIA, p4_IIIA1a, p4_IIIA2, p4_IIIA2ai1] == page4.get_sections( | |||
matches=r".*?a.*?" | |||
) | |||
assert [p4_IIIA1a, p4_IIIA2ai1] == page4.get_sections( | |||
matches=r".*?a.*?", flags=re.U | |||
) | |||
assert ["\nMore text.\n", "\nAn invalid section!"] == page4.get_sections( | |||
matches=r".*?a.*?", flags=re.U, include_headings=False | |||
) | |||
sections = page2.get_sections(include_headings=False) | |||
sections[0].append("Lead!\n") | |||
@@ -512,22 +583,22 @@ def test_get_sections(): | |||
assert "== Foo ==\nBarf {{Haha}}\n" == section | |||
assert "X\n== Foo ==\nBarf {{Haha}}\n== Baz ==\nBuzz" == page5 | |||
def test_strip_code(): | |||
"""test Wikicode.strip_code()""" | |||
# Since individual nodes have test cases for their __strip__ methods, | |||
# we're only going to do an integration test: | |||
code = parse("Foo [[bar]]\n\n{{baz|hello}}\n\n[[a|b]] Σ") | |||
assert "Foo bar\n\nb Σ" \ | |||
== code.strip_code(normalize=True, collapse=True) | |||
assert "Foo bar\n\n\n\nb Σ" \ | |||
== code.strip_code(normalize=True, collapse=False) | |||
assert "Foo bar\n\nb Σ" \ | |||
== code.strip_code(normalize=False, collapse=True) | |||
assert "Foo bar\n\n\n\nb Σ" \ | |||
== code.strip_code(normalize=False, collapse=False) | |||
assert "Foo bar\n\nhello\n\nb Σ" \ | |||
== code.strip_code(normalize=True, collapse=True, | |||
keep_template_params=True) | |||
assert "Foo bar\n\nb Σ" == code.strip_code(normalize=True, collapse=True) | |||
assert "Foo bar\n\n\n\nb Σ" == code.strip_code(normalize=True, collapse=False) | |||
assert "Foo bar\n\nb Σ" == code.strip_code(normalize=False, collapse=True) | |||
assert "Foo bar\n\n\n\nb Σ" == code.strip_code( | |||
normalize=False, collapse=False | |||
) | |||
assert "Foo bar\n\nhello\n\nb Σ" == code.strip_code( | |||
normalize=True, collapse=True, keep_template_params=True | |||
) | |||
def test_get_tree(): | |||
"""test Wikicode.get_tree()""" | |||
@@ -535,6 +606,8 @@ def test_get_tree(): | |||
# methods, and the docstring covers all possibilities for the output of | |||
# __showtree__, we'll test it only: | |||
code = parse("Lorem ipsum {{foo|bar|{{baz}}|spam=eggs}}") | |||
expected = "Lorem ipsum \n{{\n\t foo\n\t| 1\n\t= bar\n\t| 2\n\t= " + \ | |||
"{{\n\t\t\tbaz\n\t }}\n\t| spam\n\t= eggs\n}}" | |||
expected = ( | |||
"Lorem ipsum \n{{\n\t foo\n\t| 1\n\t= bar\n\t| 2\n\t= " | |||
+ "{{\n\t\t\tbaz\n\t }}\n\t| spam\n\t= eggs\n}}" | |||
) | |||
assert expected.expandtabs(4) == code.get_tree() |
@@ -27,6 +27,7 @@ import pytest | |||
from mwparserfromhell.nodes import Text, Wikilink | |||
from .conftest import assert_wikicode_equal, wrap, wraptext | |||
def test_str(): | |||
"""test Wikilink.__str__()""" | |||
node = Wikilink(wraptext("foobar")) | |||
@@ -34,6 +35,7 @@ def test_str(): | |||
node2 = Wikilink(wraptext("foo"), wraptext("bar")) | |||
assert "[[foo|bar]]" == str(node2) | |||
def test_children(): | |||
"""test Wikilink.__children__()""" | |||
node1 = Wikilink(wraptext("foobar")) | |||
@@ -48,6 +50,7 @@ def test_children(): | |||
with pytest.raises(StopIteration): | |||
next(gen2) | |||
def test_strip(): | |||
"""test Wikilink.__strip__()""" | |||
node = Wikilink(wraptext("foobar")) | |||
@@ -55,6 +58,7 @@ def test_strip(): | |||
assert "foobar" == node.__strip__() | |||
assert "bar" == node2.__strip__() | |||
def test_showtree(): | |||
"""test Wikilink.__showtree__()""" | |||
output = [] | |||
@@ -66,10 +70,19 @@ def test_showtree(): | |||
node1.__showtree__(output.append, get, mark) | |||
node2.__showtree__(output.append, get, mark) | |||
valid = [ | |||
"[[", (getter, node1.title), "]]", "[[", (getter, node2.title), | |||
" | ", marker, (getter, node2.text), "]]"] | |||
"[[", | |||
(getter, node1.title), | |||
"]]", | |||
"[[", | |||
(getter, node2.title), | |||
" | ", | |||
marker, | |||
(getter, node2.text), | |||
"]]", | |||
] | |||
assert valid == output | |||
def test_title(): | |||
"""test getter/setter for the title attribute""" | |||
title = wraptext("foobar") | |||
@@ -82,6 +95,7 @@ def test_title(): | |||
assert_wikicode_equal(wraptext("héhehé"), node1.title) | |||
assert_wikicode_equal(wraptext("héhehé"), node2.title) | |||
def test_text(): | |||
"""test getter/setter for the text attribute""" | |||
text = wraptext("baz") | |||