From dcf7ba4e79e8c606f6380846b4ef9b5116b3c942 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 16 May 2021 02:37:44 -0400 Subject: [PATCH 1/7] Version bump --- CHANGELOG | 4 ++++ appveyor.yml | 2 +- docs/changelog.rst | 8 ++++++++ src/mwparserfromhell/__init__.py | 2 +- 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 0ae6fcf..cb519f4 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,7 @@ +v0.7 (unreleased): + +- ... + v0.6.2 (released May 16, 2021): - Improved parsing of external links. (#232) diff --git a/appveyor.yml b/appveyor.yml index 85bca1c..5f89a41 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,6 +1,6 @@ # This config file is used by appveyor.com to build Windows release binaries -version: 0.6.2-b{build} +version: 0.7.dev0-b{build} branches: only: diff --git a/docs/changelog.rst b/docs/changelog.rst index 827cb01..74fdf60 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,14 @@ Changelog ========= +v0.7 +---- + +Unreleased +(`changes `__): + +- ... + v0.6.2 ------ diff --git a/src/mwparserfromhell/__init__.py b/src/mwparserfromhell/__init__.py index 917d26c..dd4eb2b 100644 --- a/src/mwparserfromhell/__init__.py +++ b/src/mwparserfromhell/__init__.py @@ -27,7 +27,7 @@ outrageously powerful parser for `MediaWiki `_ wikico __author__ = "Ben Kurtovic" __copyright__ = "Copyright (C) 2012-2021 Ben Kurtovic" __license__ = "MIT License" -__version__ = "0.6.2" +__version__ = "0.7.dev0" __email__ = "ben.kurtovic@gmail.com" from . import (definitions, nodes, parser, smart_list, string_mixin, From 3a9d63fccb959d891386da08b7cad06b81f370cc Mon Sep 17 00:00:00 2001 From: odidev Date: Tue, 10 Aug 2021 17:56:37 +0530 Subject: [PATCH 2/7] Add linux aarch64 wheel build support (#276) Signed-off-by: odidev --- .github/workflows/build-linux-wheels.yml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/.github/workflows/build-linux-wheels.yml b/.github/workflows/build-linux-wheels.yml index 272932c..7b1f0f0 100644 --- a/.github/workflows/build-linux-wheels.yml +++ b/.github/workflows/build-linux-wheels.yml @@ -23,3 +23,25 @@ jobs: with: user: __token__ password: ${{ secrets.pypi_password }} + build_aarch64: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: docker/setup-qemu-action@v1 + name: Set up QEMU + - name: Build manylinux aarch64 Python wheels + uses: RalfG/python-wheels-manylinux-build@v0.3.4-manylinux2014_aarch64 + with: + python-versions: 'cp36-cp36m cp37-cp37m cp38-cp38 cp39-cp39' + pip-wheel-args: '-w ./wheelhouse --no-deps' + - name: Move to dist/ + run: | + mkdir -p dist + cp -v wheelhouse/*-manylinux*.whl dist/ + - name: Publish package to PyPI + # Only actually publish if a new tag was pushed + if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') + uses: pypa/gh-action-pypi-publish@37e305e7413032d8422456179fee28fac7d25187 + with: + user: __token__ + password: ${{ secrets.pypi_password }} From 10e63cd45234680aa5b04393c580eef790441fbc Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 23 Aug 2021 01:25:59 -0400 Subject: [PATCH 3/7] Update changelog --- CHANGELOG | 2 +- docs/changelog.rst | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index cb519f4..6dbd975 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,6 @@ v0.7 (unreleased): -- ... +- Added Linux AArch64 wheels. (#276) v0.6.2 (released May 16, 2021): diff --git a/docs/changelog.rst b/docs/changelog.rst index 74fdf60..ebb2482 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -7,7 +7,8 @@ v0.7 Unreleased (`changes `__): -- ... +- Added Linux AArch64 wheels. + (`#276 `_) v0.6.2 ------ From 911d7e5f887e141ba3cc176efec0cd388cc2de03 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 1 Sep 2021 00:54:53 -0400 Subject: [PATCH 4/7] Clean up external links parsing logic and fix integer conversion --- .gitignore | 1 + CHANGELOG | 2 + docs/changelog.rst | 3 + src/mwparserfromhell/parser/ctokenizer/tok_parse.c | 128 +++++---------------- src/mwparserfromhell/parser/ctokenizer/tokenizer.c | 6 +- src/mwparserfromhell/parser/tokenizer.py | 95 +++++++-------- 6 files changed, 83 insertions(+), 152 deletions(-) diff --git a/.gitignore b/.gitignore index 930f0bf..0a03112 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ docs/_build scripts/*.log htmlcov/ .idea/ +.pytest_cache/ diff --git a/CHANGELOG b/CHANGELOG index 6dbd975..09d14e7 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,8 @@ v0.7 (unreleased): - Added Linux AArch64 wheels. (#276) +- Fixed C integer conversion, manifesting as parsing errors on big-endian + platforms. (#277) v0.6.2 (released May 16, 2021): diff --git a/docs/changelog.rst b/docs/changelog.rst index ebb2482..7fe93dc 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -9,6 +9,9 @@ Unreleased - Added Linux AArch64 wheels. (`#276 `_) +- Fixed C integer conversion, manifesting as parsing errors on big-endian + platforms. + (`#277 `_) v0.6.2 ------ diff --git a/src/mwparserfromhell/parser/ctokenizer/tok_parse.c b/src/mwparserfromhell/parser/ctokenizer/tok_parse.c index 6e9022d..740e9bf 100644 --- a/src/mwparserfromhell/parser/ctokenizer/tok_parse.c +++ b/src/mwparserfromhell/parser/ctokenizer/tok_parse.c @@ -1,5 +1,5 @@ /* -Copyright (C) 2012-2019 Ben Kurtovic +Copyright (C) 2012-2021 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in @@ -100,66 +100,6 @@ static PyObject* strip_tag_name(PyObject* token, int take_attr) } /* - Check if the given character is a non-word character. - - Equivalent to this Python code: - - def is_non_word_character(ch): - if re.fullmatch(r"\W", chunk): - return True - return False -*/ -static int is_non_word_character(Py_UCS4 ch) -{ - int ret = 0; - PyObject* modname = NULL; - PyObject* module = NULL; - PyObject* fmatch = NULL; - PyObject* pattern = NULL; - PyObject* str = NULL; - PyObject* posArgs = NULL; - PyObject* match = NULL; - - modname = PyUnicode_FromString("re"); - if (modname == NULL) - goto error; - module = PyImport_Import(modname); - if (module == NULL) - goto error; - fmatch = PyObject_GetAttrString(module, "fullmatch"); - if (fmatch == NULL) - goto error; - pattern = PyUnicode_FromString("\\W"); - if (pattern == NULL) - goto error; - str = PyUnicode_FROM_SINGLE(ch); - if (str == NULL) - goto error; - posArgs = PyTuple_Pack(2, pattern, str); - if (posArgs == NULL) - goto error; - match = PyObject_Call(fmatch, posArgs, NULL); - if (match == NULL) - goto error; - - if (match != Py_None) - ret = 1; - goto end; - - error: - ret = -1; - end: - Py_XDECREF(match); - Py_XDECREF(posArgs); - Py_XDECREF(str); - Py_XDECREF(pattern); - Py_XDECREF(fmatch); - Py_XDECREF(module); - Py_XDECREF(modname); - return ret; -} - -/* Parse a template at the head of the wikicode string. */ static int Tokenizer_parse_template(Tokenizer* self, int has_content) @@ -576,7 +516,7 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) static const char* valid = URISCHEME; Textbuffer *scheme_buffer = Textbuffer_new(&self->text); PyObject *scheme; - Py_UCS4 chunk; + Py_UCS4 ch; Py_ssize_t i; int slashes, j; uint64_t new_context; @@ -586,15 +526,10 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) // We have to backtrack through the textbuffer looking for our scheme since // it was just parsed as text: for (i = self->topstack->textbuffer->length - 1; i >= 0; i--) { - chunk = Textbuffer_read(self->topstack->textbuffer, i); - // stop at the first non-word character - int is_non_word = is_non_word_character(chunk); - if (is_non_word < 0) { - Textbuffer_dealloc(scheme_buffer); - return -1; - } - else if (is_non_word == 1) - goto end_of_loop; + ch = Textbuffer_read(self->topstack->textbuffer, i); + // Stop at the first non-word character (equivalent to \W in regex) + if (!Py_UNICODE_ISALNUM(ch) && ch != '_') + break; j = 0; do { if (!valid[j]) { @@ -602,10 +537,9 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) FAIL_ROUTE(0); return 0; } - } while (chunk != (Py_UCS4) valid[j++]); - Textbuffer_write(scheme_buffer, chunk); + } while (ch != (Py_UCS4) valid[j++]); + Textbuffer_write(scheme_buffer, ch); } - end_of_loop: Textbuffer_reverse(scheme_buffer); scheme = Textbuffer_render(scheme_buffer); if (!scheme) { @@ -670,17 +604,17 @@ static int Tokenizer_handle_free_link_text( } /* - Return whether the current head is the end of a free link. + Return whether the current head is the end of a URI. */ static int -Tokenizer_is_free_link_end(Tokenizer* self, Py_UCS4 this, Py_UCS4 next) +Tokenizer_is_uri_end(Tokenizer* self, Py_UCS4 this, Py_UCS4 next) { // Built from Tokenizer_parse()'s end sentinels: Py_UCS4 after = Tokenizer_read(self, 2); uint64_t ctx = self->topstack->context; return (!this || this == '\n' || this == '[' || this == ']' || - this == '<' || this == '>' || this == '"' || + this == '<' || this == '>' || this == '"' || this == ' ' || (this == '\'' && next == '\'') || (this == '|' && ctx & LC_TEMPLATE) || (this == '=' && ctx & (LC_TEMPLATE_PARAM_KEY | LC_HEADING)) || @@ -723,50 +657,48 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, if (Tokenizer_parse_comment(self)) return NULL; } - else if (!brackets && Tokenizer_is_free_link_end(self, this, next)) { - self->head--; - return Tokenizer_pop(self); - } - else if (!this || this == '\n') - return Tokenizer_fail_route(self); else if (this == '{' && next == '{' && Tokenizer_CAN_RECURSE(self)) { PUSH_TAIL_BUFFER(extra, NULL) if (Tokenizer_parse_template_or_argument(self)) return NULL; } - else if (this == ']') - return Tokenizer_pop(self); - else if (this == ' ' || Tokenizer_is_free_link_end(self, this, next)) { - if (brackets) { + else if (brackets) { + if (!this || this == '\n') + return Tokenizer_fail_route(self); + if (this == ']') + return Tokenizer_pop(self); + if (Tokenizer_is_uri_end(self, this, next)) { if (this == ' ') { if (Tokenizer_emit(self, ExternalLinkSeparator)) return NULL; + self->head++; } else { PyObject* kwargs = PyDict_New(); if (!kwargs) return NULL; - if (this != ' ') - PyDict_SetItemString(kwargs, "suppress_space", Py_True); + PyDict_SetItemString(kwargs, "suppress_space", Py_True); if (Tokenizer_emit_kwargs(self, ExternalLinkSeparator, kwargs)) return NULL; } self->topstack->context ^= LC_EXT_LINK_URI; self->topstack->context |= LC_EXT_LINK_TITLE; - if (this == ' ') - self->head++; return Tokenizer_parse(self, 0, 0); } - if (Textbuffer_write(extra, this)) - return NULL; - return Tokenizer_pop(self); - } - else if (!brackets) { - if (Tokenizer_handle_free_link_text(self, &parens, extra, this)) + if (Tokenizer_emit_char(self, this)) return NULL; } else { - if (Tokenizer_emit_char(self, this)) + if (Tokenizer_is_uri_end(self, this, next)) { + if (this == ' ') { + if (Textbuffer_write(extra, this)) + return NULL; + } + else + self->head--; + return Tokenizer_pop(self); + } + if (Tokenizer_handle_free_link_text(self, &parens, extra, this)) return NULL; } self->head++; diff --git a/src/mwparserfromhell/parser/ctokenizer/tokenizer.c b/src/mwparserfromhell/parser/ctokenizer/tokenizer.c index a501032..066f527 100644 --- a/src/mwparserfromhell/parser/ctokenizer/tokenizer.c +++ b/src/mwparserfromhell/parser/ctokenizer/tokenizer.c @@ -129,10 +129,10 @@ static int load_tokenizer_text(TokenizerInput* text, PyObject *input) static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) { PyObject *input, *tokens; - uint64_t context = 0; + unsigned long long context = 0; int skip_style_tags = 0; - if (PyArg_ParseTuple(args, "U|ii", &input, &context, &skip_style_tags)) { + if (PyArg_ParseTuple(args, "U|Kp", &input, &context, &skip_style_tags)) { Py_INCREF(input); if (load_tokenizer_text(&self->text, input)) return NULL; @@ -143,7 +143,7 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) /* Failed to parse a Unicode object; try a string instead. */ PyErr_Clear(); - if (!PyArg_ParseTuple(args, "s#|ii", &encoded, &size, &context, + if (!PyArg_ParseTuple(args, "s#|Kp", &encoded, &size, &context, &skip_style_tags)) return NULL; if (!(input = PyUnicode_FromStringAndSize(encoded, size))) diff --git a/src/mwparserfromhell/parser/tokenizer.py b/src/mwparserfromhell/parser/tokenizer.py index 76efd9b..efac02c 100644 --- a/src/mwparserfromhell/parser/tokenizer.py +++ b/src/mwparserfromhell/parser/tokenizer.py @@ -1,4 +1,4 @@ -# Copyright (C) 2012-2020 Ben Kurtovic +# Copyright (C) 2012-2021 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -60,8 +60,9 @@ class Tokenizer: USES_C = False START = object() END = object() - MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "'", "#", "*", ";", + MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "'", '"', "#", "*", ";", ":", "/", "-", "!", "\n", START, END] + URISCHEME = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-" MAX_DEPTH = 40 regex = re.compile(r"([{}\[\]<>|=&'#*;:/\\\"\-!\n])", flags=re.IGNORECASE) tag_splitter = re.compile(r"([\s\"\'\\]+)") @@ -323,7 +324,7 @@ class Tokenizer: self._head += 2 try: # If the wikilink looks like an external link, parse it as such: - link, _extra, _delta = self._really_parse_external_link(True) + link, _extra = self._really_parse_external_link(True) except BadRoute: self._head = reset + 1 try: @@ -366,8 +367,7 @@ class Tokenizer: self._emit_text("//") self._head += 2 else: - valid = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-" - all_valid = lambda: all(char in valid for char in self._read()) + all_valid = lambda: all(char in self.URISCHEME for char in self._read()) scheme = "" while self._read() is not self.END and all_valid(): scheme += self._read() @@ -386,17 +386,16 @@ class Tokenizer: def _parse_free_uri_scheme(self): """Parse the URI scheme of a free (no brackets) external link.""" - valid = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-" scheme = [] try: # We have to backtrack through the textbuffer looking for our # scheme since it was just parsed as text: for chunk in reversed(self._textbuffer): for char in reversed(chunk): - # stop at the first non-word character + # Stop at the first non-word character if re.fullmatch(r"\W", char): raise StopIteration() - if char not in valid: + if char not in self.URISCHEME: raise BadRoute() scheme.append(char) except StopIteration: @@ -434,15 +433,15 @@ class Tokenizer: self._emit_text(this) return punct, tail - def _is_free_link_end(self, this, nxt): - """Return whether the current head is the end of a free link.""" + def _is_uri_end(self, this, nxt): + """Return whether the current head is the end of a URI.""" # Built from _parse()'s end sentinels: after, ctx = self._read(2), self._context - equal_sign_contexts = contexts.TEMPLATE_PARAM_KEY | contexts.HEADING - return (this in (self.END, "\n", "[", "]", "<", ">", "\"") or + return (this in (self.END, "\n", "[", "]", "<", ">", '"') or + " " in this or this == nxt == "'" or (this == "|" and ctx & contexts.TEMPLATE) or - (this == "=" and ctx & equal_sign_contexts) or + (this == "=" and ctx & (contexts.TEMPLATE_PARAM_KEY | contexts.HEADING)) or (this == nxt == "}" and ctx & contexts.TEMPLATE) or (this == nxt == after == "}" and ctx & contexts.ARGUMENT)) @@ -451,6 +450,7 @@ class Tokenizer: if brackets: self._parse_bracketed_uri_scheme() invalid = ("\n", " ", "]") + punct = () else: self._parse_free_uri_scheme() invalid = ("\n", " ", "[", "]") @@ -465,53 +465,47 @@ class Tokenizer: self._emit_text(tail) tail = "" self._parse_entity() - elif (this == "<" and nxt == "!" and self._read(2) == - self._read(3) == "-"): + elif this == "<" and nxt == "!" and self._read(2) == self._read(3) == "-": if tail: self._emit_text(tail) tail = "" self._parse_comment() - elif not brackets and self._is_free_link_end(this, nxt): - return self._pop(), tail, -1 - elif this is self.END or this == "\n": - self._fail_route() elif this == nxt == "{" and self._can_recurse(): if tail: self._emit_text(tail) tail = "" self._parse_template_or_argument() - elif this == "]": - return self._pop(), tail, 0 - elif this == "'" and nxt == "'": - separator = tokens.ExternalLinkSeparator() - separator.suppress_space = True - self._emit(separator) - self._context ^= contexts.EXT_LINK_URI - self._context |= contexts.EXT_LINK_TITLE - return self._parse(push=False), None, 0 - elif any(ch in this for ch in (" ", "\n", "[", "]", "<", ">", - "\"")): - before, after = re.split(r"[ \n[\]<>\"]", this, maxsplit=1) - delimiter = this[len(before)] - if brackets: - self._emit_text(before) - separator = tokens.ExternalLinkSeparator() - if delimiter != " ": + elif brackets: + if this is self.END or this == "\n": + self._fail_route() + if this == "]": + return self._pop(), None + if self._is_uri_end(this, nxt): + if " " in this: + before, after = this.split(" ", 1) + self._emit_text(before) + self._emit(tokens.ExternalLinkSeparator()) + if after: + self._emit_text(after) + self._head += 1 + else: + separator = tokens.ExternalLinkSeparator() separator.suppress_space = True - self._emit(separator) - if after: - self._emit_text(after) + self._emit(separator) self._context ^= contexts.EXT_LINK_URI self._context |= contexts.EXT_LINK_TITLE - if delimiter == " ": - self._head += 1 - return self._parse(push=False), None, 0 - punct, tail = self._handle_free_link_text(punct, tail, before) - return self._pop(), tail + " " + after, 0 - elif not brackets: - punct, tail = self._handle_free_link_text(punct, tail, this) - else: + return self._parse(push=False), None self._emit_text(this) + else: + if self._is_uri_end(this, nxt): + if this is not self.END and " " in this: + before, after = this.split(" ", 1) + punct, tail = self._handle_free_link_text(punct, tail, before) + tail += " " + after + else: + self._head -= 1 + return self._pop(), tail + punct, tail = self._handle_free_link_text(punct, tail, this) self._head += 1 def _remove_uri_scheme_from_textbuffer(self, scheme): @@ -536,7 +530,7 @@ class Tokenizer: reset = self._head self._head += 1 try: - link, extra, delta = self._really_parse_external_link(brackets) + link, extra = self._really_parse_external_link(brackets) except BadRoute: self._head = reset if not brackets and self._context & contexts.DL_TERM: @@ -550,7 +544,6 @@ class Tokenizer: self._emit(tokens.ExternalLinkOpen(brackets=brackets)) self._emit_all(link) self._emit(tokens.ExternalLinkClose()) - self._head += delta if extra: self._emit_text(extra) @@ -854,8 +847,8 @@ class Tokenizer: depth -= 1 if depth == 0: # pragma: no cover (untestable/exceptional) raise ParserError( - "_handle_single_tag_end() got an unexpected " - "TagCloseSelfclose") + "_handle_single_tag_end() got an unexpected TagCloseSelfclose" + ) else: # pragma: no cover (untestable/exceptional case) raise ParserError("_handle_single_tag_end() missed a TagCloseOpen") padding = stack[index].padding From 8cd0bdb322b0de1a8e41097eb674ac8671c99bbd Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 2 Sep 2021 01:01:22 -0400 Subject: [PATCH 5/7] Autoformat: black + clang-format + clang-tidy --- .clang-format | 13 + .gitignore | 1 + scripts/memtest.py | 17 +- setup.py | 55 +- src/mwparserfromhell/__init__.py | 3 +- src/mwparserfromhell/definitions.py | 20 +- src/mwparserfromhell/nodes/__init__.py | 14 +- src/mwparserfromhell/nodes/_base.py | 2 + src/mwparserfromhell/nodes/argument.py | 1 + src/mwparserfromhell/nodes/comment.py | 1 + src/mwparserfromhell/nodes/external_link.py | 2 + src/mwparserfromhell/nodes/extras/attribute.py | 12 +- src/mwparserfromhell/nodes/extras/parameter.py | 4 +- src/mwparserfromhell/nodes/heading.py | 1 + src/mwparserfromhell/nodes/html_entity.py | 17 +- src/mwparserfromhell/nodes/tag.py | 40 +- src/mwparserfromhell/nodes/template.py | 13 +- src/mwparserfromhell/nodes/text.py | 1 + src/mwparserfromhell/nodes/wikilink.py | 1 + src/mwparserfromhell/parser/__init__.py | 5 + src/mwparserfromhell/parser/builder.py | 77 +- src/mwparserfromhell/parser/contexts.py | 93 +- src/mwparserfromhell/parser/ctokenizer/avl_tree.c | 1035 ++++++------ src/mwparserfromhell/parser/ctokenizer/avl_tree.h | 343 ++-- src/mwparserfromhell/parser/ctokenizer/common.h | 67 +- src/mwparserfromhell/parser/ctokenizer/contexts.h | 16 +- .../parser/ctokenizer/definitions.c | 54 +- .../parser/ctokenizer/definitions.h | 11 +- src/mwparserfromhell/parser/ctokenizer/tag_data.c | 34 +- src/mwparserfromhell/parser/ctokenizer/tag_data.h | 12 +- .../parser/ctokenizer/textbuffer.c | 71 +- .../parser/ctokenizer/textbuffer.h | 16 +- src/mwparserfromhell/parser/ctokenizer/tok_parse.c | 1651 ++++++++++++-------- src/mwparserfromhell/parser/ctokenizer/tok_parse.h | 7 +- .../parser/ctokenizer/tok_support.c | 201 ++- .../parser/ctokenizer/tok_support.h | 59 +- src/mwparserfromhell/parser/ctokenizer/tokenizer.c | 161 +- src/mwparserfromhell/parser/ctokenizer/tokenizer.h | 103 +- src/mwparserfromhell/parser/ctokenizer/tokens.c | 86 +- src/mwparserfromhell/parser/ctokenizer/tokens.h | 76 +- src/mwparserfromhell/parser/errors.py | 2 + src/mwparserfromhell/parser/tokenizer.py | 178 ++- src/mwparserfromhell/parser/tokens.py | 58 +- src/mwparserfromhell/smart_list/list_proxy.py | 8 +- src/mwparserfromhell/string_mixin.py | 7 +- src/mwparserfromhell/utils.py | 7 +- src/mwparserfromhell/wikicode.py | 79 +- tests/conftest.py | 24 +- tests/test_argument.py | 18 +- tests/test_attribute.py | 5 + tests/test_builder.py | 1063 +++++++++---- tests/test_comment.py | 5 + tests/test_docs.py | 18 +- tests/test_external_link.py | 19 +- tests/test_heading.py | 9 +- tests/test_html_entity.py | 9 + tests/test_parameter.py | 4 + tests/test_parser.py | 62 +- tests/test_smart_list.py | 29 +- tests/test_string_mixin.py | 115 +- tests/test_tag.py | 139 +- tests/test_template.py | 407 +++-- tests/test_text.py | 5 + tests/test_tokenizer.py | 40 +- tests/test_tokens.py | 14 +- tests/test_utils.py | 41 +- tests/test_wikicode.py | 207 ++- tests/test_wikilink.py | 18 +- 68 files changed, 4288 insertions(+), 2698 deletions(-) create mode 100644 .clang-format diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..96a12b2 --- /dev/null +++ b/.clang-format @@ -0,0 +1,13 @@ +BasedOnStyle: LLVM +AlignConsecutiveMacros: AcrossEmptyLines +AllowShortFunctionsOnASingleLine: Inline +AlwaysBreakAfterReturnType: TopLevelDefinitions +BinPackArguments: false +BinPackParameters: false +BreakBeforeBraces: Linux +ColumnLimit: 88 +IndentPPDirectives: AfterHash +IndentWidth: 4 +SpaceAfterCStyleCast: true +StatementMacros: + - PyObject_HEAD diff --git a/.gitignore b/.gitignore index 0a03112..b966a55 100644 --- a/.gitignore +++ b/.gitignore @@ -13,5 +13,6 @@ dist docs/_build scripts/*.log htmlcov/ +compile_commands.json .idea/ .pytest_cache/ diff --git a/scripts/memtest.py b/scripts/memtest.py index 6f0d1ab..d5e39e5 100644 --- a/scripts/memtest.py +++ b/scripts/memtest.py @@ -41,6 +41,7 @@ from mwparserfromhell.parser._tokenizer import CTokenizer LOOPS = 10000 + class Color: GRAY = "\x1b[30;1m" GREEN = "\x1b[92m" @@ -63,11 +64,11 @@ class MemoryTest: data = {"name": None, "label": None, "input": None, "output": None} for line in test.strip().splitlines(): if line.startswith("name:"): - data["name"] = line[len("name:"):].strip() + data["name"] = line[len("name:") :].strip() elif line.startswith("label:"): - data["label"] = line[len("label:"):].strip() + data["label"] = line[len("label:") :].strip() elif line.startswith("input:"): - raw = line[len("input:"):].strip() + raw = line[len("input:") :].strip() if raw[0] == '"' and raw[-1] == '"': raw = raw[1:-1] raw = raw.encode("raw_unicode_escape") @@ -81,7 +82,7 @@ class MemoryTest: def load_file(filename): with open(filename, "rU") as fp: text = fp.read() - name = path.split(filename)[1][:0-len(extension)] + name = path.split(filename)[1][: 0 - len(extension)] self._parse_file(name, text) root = path.split(path.dirname(path.abspath(__file__)))[0] @@ -119,8 +120,11 @@ class MemoryTest: tmpl = "{0}[{1:03}/{2}]{3} {4}: " for i, (name, text) in enumerate(self._tests, 1): - sys.stdout.write(tmpl.format(Color.GRAY, i, len(self._tests), - Color.RESET, name.ljust(width))) + sys.stdout.write( + tmpl.format( + Color.GRAY, i, len(self._tests), Color.RESET, name.ljust(width) + ) + ) sys.stdout.flush() parent, child = Pipe() p = Process(target=_runner, args=(text, child)) @@ -156,6 +160,7 @@ def _runner(text, child): child.send("OK") child.recv() + if __name__ == "__main__": setlocale(LC_ALL, "") MemoryTest().run() diff --git a/setup.py b/setup.py index 9842fb9..a8540d5 100644 --- a/setup.py +++ b/setup.py @@ -52,8 +52,10 @@ elif env_var is not None: # Remove the command line argument as it isn't understood by setuptools: -sys.argv = [arg for arg in sys.argv - if arg not in ("--without-extension", "--with-extension")] +sys.argv = [ + arg for arg in sys.argv if arg not in ("--without-extension", "--with-extension") +] + def build_ext_patched(self): try: @@ -63,33 +65,40 @@ def build_ext_patched(self): print("Falling back to pure Python mode.") del self.extensions[:] + if fallback: build_ext.run, build_ext_original = build_ext_patched, build_ext.run # Project-specific part begins here: -tokenizer = Extension("mwparserfromhell.parser._tokenizer", - sources=sorted(glob("src/mwparserfromhell/parser/ctokenizer/*.c")), - depends=sorted(glob("src/mwparserfromhell/parser/ctokenizer/*.h"))) +tokenizer = Extension( + "mwparserfromhell.parser._tokenizer", + sources=sorted(glob("src/mwparserfromhell/parser/ctokenizer/*.c")), + depends=sorted(glob("src/mwparserfromhell/parser/ctokenizer/*.h")), +) setup( - name = "mwparserfromhell", - packages = find_packages("src"), - package_dir = {"": "src"}, - ext_modules = [tokenizer] if use_extension else [], - setup_requires = ["pytest-runner"] if "test" in sys.argv or "pytest" in sys.argv else [], - tests_require = ["pytest"], - version = __version__, - python_requires = ">= 3.5", - author = "Ben Kurtovic", - author_email = "ben.kurtovic@gmail.com", - url = "https://github.com/earwig/mwparserfromhell", - description = "MWParserFromHell is a parser for MediaWiki wikicode.", - long_description = long_docs, - download_url = "https://github.com/earwig/mwparserfromhell/tarball/v{}".format(__version__), - keywords = "earwig mwparserfromhell wikipedia wiki mediawiki wikicode template parsing", - license = "MIT License", - classifiers = [ + name="mwparserfromhell", + packages=find_packages("src"), + package_dir={"": "src"}, + ext_modules=[tokenizer] if use_extension else [], + setup_requires=["pytest-runner"] + if "test" in sys.argv or "pytest" in sys.argv + else [], + tests_require=["pytest"], + version=__version__, + python_requires=">= 3.5", + author="Ben Kurtovic", + author_email="ben.kurtovic@gmail.com", + url="https://github.com/earwig/mwparserfromhell", + description="MWParserFromHell is a parser for MediaWiki wikicode.", + long_description=long_docs, + download_url="https://github.com/earwig/mwparserfromhell/tarball/v{}".format( + __version__ + ), + keywords="earwig mwparserfromhell wikipedia wiki mediawiki wikicode template parsing", + license="MIT License", + classifiers=[ "Development Status :: 4 - Beta", "Environment :: Console", "Intended Audience :: Developers", @@ -101,6 +110,6 @@ setup( "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", - "Topic :: Text Processing :: Markup" + "Topic :: Text Processing :: Markup", ], ) diff --git a/src/mwparserfromhell/__init__.py b/src/mwparserfromhell/__init__.py index dd4eb2b..6e65a7e 100644 --- a/src/mwparserfromhell/__init__.py +++ b/src/mwparserfromhell/__init__.py @@ -30,7 +30,6 @@ __license__ = "MIT License" __version__ = "0.7.dev0" __email__ = "ben.kurtovic@gmail.com" -from . import (definitions, nodes, parser, smart_list, string_mixin, - utils, wikicode) +from . import definitions, nodes, parser, smart_list, string_mixin, utils, wikicode parse = utils.parse_anything diff --git a/src/mwparserfromhell/definitions.py b/src/mwparserfromhell/definitions.py index c8d37cd..3258063 100644 --- a/src/mwparserfromhell/definitions.py +++ b/src/mwparserfromhell/definitions.py @@ -26,8 +26,14 @@ When updating this file, please also update the the C tokenizer version: - mwparserfromhell/parser/ctokenizer/definitions.h """ -__all__ = ["get_html_tag", "is_parsable", "is_visible", "is_single", - "is_single_only", "is_scheme"] +__all__ = [ + "get_html_tag", + "is_parsable", + "is_visible", + "is_single", + "is_single_only", + "is_scheme", +] URI_SCHEMES = { # [wikimedia/mediawiki.git]/includes/DefaultSettings.php @ 5c660de5d0 @@ -92,7 +98,7 @@ INVISIBLE_TAGS = [ "score", "section", "templatedata", - "timeline" + "timeline", ] # [wikimedia/mediawiki.git]/includes/parser/Sanitizer.php @ 95e17ee645 @@ -103,29 +109,35 @@ MARKUP_TO_HTML = { "#": "li", "*": "li", ";": "dt", - ":": "dd" + ":": "dd", } + def get_html_tag(markup): """Return the HTML tag associated with the given wiki-markup.""" return MARKUP_TO_HTML[markup] + def is_parsable(tag): """Return if the given *tag*'s contents should be passed to the parser.""" return tag.lower() not in PARSER_BLACKLIST + def is_visible(tag): """Return whether or not the given *tag* contains visible text.""" return tag.lower() not in INVISIBLE_TAGS + def is_single(tag): """Return whether or not the given *tag* can exist without a close tag.""" return tag.lower() in SINGLE + def is_single_only(tag): """Return whether or not the given *tag* must exist without a close tag.""" return tag.lower() in SINGLE_ONLY + def is_scheme(scheme, slashes=True): """Return whether *scheme* is valid for external links.""" scheme = scheme.lower() diff --git a/src/mwparserfromhell/nodes/__init__.py b/src/mwparserfromhell/nodes/__init__.py index 18a1780..2f333a9 100644 --- a/src/mwparserfromhell/nodes/__init__.py +++ b/src/mwparserfromhell/nodes/__init__.py @@ -39,5 +39,15 @@ from .tag import Tag from .template import Template from .wikilink import Wikilink -__all__ = ["Argument", "Comment", "ExternalLink", "HTMLEntity", "Heading", - "Node", "Tag", "Template", "Text", "Wikilink"] +__all__ = [ + "Argument", + "Comment", + "ExternalLink", + "HTMLEntity", + "Heading", + "Node", + "Tag", + "Template", + "Text", + "Wikilink", +] diff --git a/src/mwparserfromhell/nodes/_base.py b/src/mwparserfromhell/nodes/_base.py index e6b2a50..8ea36d3 100644 --- a/src/mwparserfromhell/nodes/_base.py +++ b/src/mwparserfromhell/nodes/_base.py @@ -22,6 +22,7 @@ from ..string_mixin import StringMixIn __all__ = ["Node"] + class Node(StringMixIn): """Represents the base Node type, demonstrating the methods to override. @@ -35,6 +36,7 @@ class Node(StringMixIn): :meth:`__showtree__` can be overridden to build a nice tree representation of the node, if desired, for :meth:`~.Wikicode.get_tree`. """ + def __str__(self): raise NotImplementedError() diff --git a/src/mwparserfromhell/nodes/argument.py b/src/mwparserfromhell/nodes/argument.py index 501788f..f590245 100644 --- a/src/mwparserfromhell/nodes/argument.py +++ b/src/mwparserfromhell/nodes/argument.py @@ -24,6 +24,7 @@ from ..utils import parse_anything __all__ = ["Argument"] + class Argument(Node): """Represents a template argument substitution, like ``{{{foo}}}``.""" diff --git a/src/mwparserfromhell/nodes/comment.py b/src/mwparserfromhell/nodes/comment.py index fd8a9cc..f2a6a6d 100644 --- a/src/mwparserfromhell/nodes/comment.py +++ b/src/mwparserfromhell/nodes/comment.py @@ -23,6 +23,7 @@ from ._base import Node __all__ = ["Comment"] + class Comment(Node): """Represents a hidden HTML comment, like ````.""" diff --git a/src/mwparserfromhell/nodes/external_link.py b/src/mwparserfromhell/nodes/external_link.py index 6dafe71..85f1fae 100644 --- a/src/mwparserfromhell/nodes/external_link.py +++ b/src/mwparserfromhell/nodes/external_link.py @@ -24,6 +24,7 @@ from ..utils import parse_anything __all__ = ["ExternalLink"] + class ExternalLink(Node): """Represents an external link, like ``[http://example.com/ Example]``.""" @@ -83,6 +84,7 @@ class ExternalLink(Node): def url(self, value): # pylint: disable=import-outside-toplevel from ..parser import contexts + self._url = parse_anything(value, contexts.EXT_LINK_URI) @title.setter diff --git a/src/mwparserfromhell/nodes/extras/attribute.py b/src/mwparserfromhell/nodes/extras/attribute.py index 9e7b7cd..fc3421a 100644 --- a/src/mwparserfromhell/nodes/extras/attribute.py +++ b/src/mwparserfromhell/nodes/extras/attribute.py @@ -24,6 +24,7 @@ from ...utils import parse_anything __all__ = ["Attribute"] + class Attribute(StringMixIn): """Represents an attribute of an HTML tag. @@ -32,8 +33,15 @@ class Attribute(StringMixIn): whose value is ``"foo"``. """ - def __init__(self, name, value=None, quotes='"', pad_first=" ", - pad_before_eq="", pad_after_eq=""): + def __init__( + self, + name, + value=None, + quotes='"', + pad_first=" ", + pad_before_eq="", + pad_after_eq="", + ): super().__init__() self.name = name self._quotes = None diff --git a/src/mwparserfromhell/nodes/extras/parameter.py b/src/mwparserfromhell/nodes/extras/parameter.py index 44fb3aa..6352f5f 100644 --- a/src/mwparserfromhell/nodes/extras/parameter.py +++ b/src/mwparserfromhell/nodes/extras/parameter.py @@ -25,6 +25,7 @@ from ...utils import parse_anything __all__ = ["Parameter"] + class Parameter(StringMixIn): """Represents a paramater of a template. @@ -77,6 +78,5 @@ class Parameter(StringMixIn): def showkey(self, newval): newval = bool(newval) if not newval and not self.can_hide_key(self.name): - raise ValueError("parameter key {!r} cannot be hidden".format( - self.name)) + raise ValueError("parameter key {!r} cannot be hidden".format(self.name)) self._showkey = newval diff --git a/src/mwparserfromhell/nodes/heading.py b/src/mwparserfromhell/nodes/heading.py index 77f2f68..b65cf29 100644 --- a/src/mwparserfromhell/nodes/heading.py +++ b/src/mwparserfromhell/nodes/heading.py @@ -24,6 +24,7 @@ from ..utils import parse_anything __all__ = ["Heading"] + class Heading(Node): """Represents a section heading in wikicode, like ``== Foo ==``.""" diff --git a/src/mwparserfromhell/nodes/html_entity.py b/src/mwparserfromhell/nodes/html_entity.py index fa3fa4d..2322e38 100644 --- a/src/mwparserfromhell/nodes/html_entity.py +++ b/src/mwparserfromhell/nodes/html_entity.py @@ -24,6 +24,7 @@ from ._base import Node __all__ = ["HTMLEntity"] + class HTMLEntity(Node): """Represents an HTML entity, like `` ``, either named or unnamed.""" @@ -101,19 +102,23 @@ class HTMLEntity(Node): except ValueError: if newval not in htmlentities.entitydefs: raise ValueError( - "entity value {!r} is not a valid name".format(newval)) from None + "entity value {!r} is not a valid name".format(newval) + ) from None self._named = True self._hexadecimal = False else: if intval < 0 or intval > 0x10FFFF: raise ValueError( - "entity value 0x{:x} is not in range(0x110000)".format(intval)) from None + "entity value 0x{:x} is not in range(0x110000)".format(intval) + ) from None self._named = False self._hexadecimal = True else: test = int(newval, 16 if self.hexadecimal else 10) if test < 0 or test > 0x10FFFF: - raise ValueError("entity value {} is not in range(0x110000)".format(test)) + raise ValueError( + "entity value {} is not in range(0x110000)".format(test) + ) self._named = False self._value = newval @@ -126,8 +131,10 @@ class HTMLEntity(Node): try: int(self.value, 16) except ValueError as exc: - raise ValueError("current entity value {!r} is not a valid " - "Unicode codepoint".format(self.value)) from exc + raise ValueError( + "current entity value {!r} is not a valid " + "Unicode codepoint".format(self.value) + ) from exc self._named = newval @hexadecimal.setter diff --git a/src/mwparserfromhell/nodes/tag.py b/src/mwparserfromhell/nodes/tag.py index eb59c5b..027b09c 100644 --- a/src/mwparserfromhell/nodes/tag.py +++ b/src/mwparserfromhell/nodes/tag.py @@ -26,13 +26,24 @@ from ..utils import parse_anything __all__ = ["Tag"] + class Tag(Node): """Represents an HTML-style tag in wikicode, like ````.""" - def __init__(self, tag, contents=None, attrs=None, wiki_markup=None, - self_closing=False, invalid=False, implicit=False, padding="", - closing_tag=None, wiki_style_separator=None, - closing_wiki_markup=None): + def __init__( + self, + tag, + contents=None, + attrs=None, + wiki_markup=None, + self_closing=False, + invalid=False, + implicit=False, + padding="", + closing_tag=None, + wiki_style_separator=None, + closing_wiki_markup=None, + ): super().__init__() self.tag = tag self.contents = contents @@ -60,8 +71,14 @@ class Tag(Node): if self.self_closing: return self.wiki_markup + attrs + padding + separator close = self.closing_wiki_markup or "" - return self.wiki_markup + attrs + padding + separator + \ - str(self.contents) + close + return ( + self.wiki_markup + + attrs + + padding + + separator + + str(self.contents) + + close + ) result = (" * Slight changes for compatibility by Ben Kurtovic @@ -19,7 +19,7 @@ */ #define false 0 -#define true 1 +#define true 1 typedef int bool; @@ -32,21 +32,24 @@ typedef int bool; static AVL_INLINE struct avl_tree_node * avl_get_child(const struct avl_tree_node *parent, int sign) { - if (sign < 0) - return parent->left; - else - return parent->right; + if (sign < 0) { + return parent->left; + } else { + return parent->right; + } } static AVL_INLINE struct avl_tree_node * avl_tree_first_or_last_in_order(const struct avl_tree_node *root, int sign) { - const struct avl_tree_node *first = root; - - if (first) - while (avl_get_child(first, +sign)) - first = avl_get_child(first, +sign); - return (struct avl_tree_node *)first; + const struct avl_tree_node *first = root; + + if (first) { + while (avl_get_child(first, +sign)) { + first = avl_get_child(first, +sign); + } + } + return (struct avl_tree_node *) first; } /* Starts an in-order traversal of the tree: returns the least-valued node, or @@ -54,7 +57,7 @@ avl_tree_first_or_last_in_order(const struct avl_tree_node *root, int sign) struct avl_tree_node * avl_tree_first_in_order(const struct avl_tree_node *root) { - return avl_tree_first_or_last_in_order(root, -1); + return avl_tree_first_or_last_in_order(root, -1); } /* Starts a *reverse* in-order traversal of the tree: returns the @@ -62,25 +65,24 @@ avl_tree_first_in_order(const struct avl_tree_node *root) struct avl_tree_node * avl_tree_last_in_order(const struct avl_tree_node *root) { - return avl_tree_first_or_last_in_order(root, 1); + return avl_tree_first_or_last_in_order(root, 1); } static AVL_INLINE struct avl_tree_node * avl_tree_next_or_prev_in_order(const struct avl_tree_node *node, int sign) { - const struct avl_tree_node *next; - - if (avl_get_child(node, +sign)) - for (next = avl_get_child(node, +sign); - avl_get_child(next, -sign); - next = avl_get_child(next, -sign)) - ; - else - for (next = avl_get_parent(node); - next && node == avl_get_child(next, +sign); - node = next, next = avl_get_parent(next)) - ; - return (struct avl_tree_node *)next; + const struct avl_tree_node *next; + + if (avl_get_child(node, +sign)) { + for (next = avl_get_child(node, +sign); avl_get_child(next, -sign); + next = avl_get_child(next, -sign)) { + } + } else { + for (next = avl_get_parent(node); next && node == avl_get_child(next, +sign); + node = next, next = avl_get_parent(next)) { + } + } + return (struct avl_tree_node *) next; } /* Continues an in-order traversal of the tree: returns the next-greatest-valued @@ -88,7 +90,7 @@ avl_tree_next_or_prev_in_order(const struct avl_tree_node *node, int sign) struct avl_tree_node * avl_tree_next_in_order(const struct avl_tree_node *node) { - return avl_tree_next_or_prev_in_order(node, 1); + return avl_tree_next_or_prev_in_order(node, 1); } /* Continues a *reverse* in-order traversal of the tree: returns the @@ -96,20 +98,22 @@ avl_tree_next_in_order(const struct avl_tree_node *node) struct avl_tree_node * avl_tree_prev_in_order(const struct avl_tree_node *node) { - return avl_tree_next_or_prev_in_order(node, -1); + return avl_tree_next_or_prev_in_order(node, -1); } /* Starts a postorder traversal of the tree. */ struct avl_tree_node * avl_tree_first_in_postorder(const struct avl_tree_node *root) { - const struct avl_tree_node *first = root; + const struct avl_tree_node *first = root; - if (first) - while (first->left || first->right) - first = first->left ? first->left : first->right; + if (first) { + while (first->left || first->right) { + first = first->left ? first->left : first->right; + } + } - return (struct avl_tree_node *)first; + return (struct avl_tree_node *) first; } /* Continues a postorder traversal of the tree. @prev will not be deferenced as @@ -118,16 +122,16 @@ avl_tree_first_in_postorder(const struct avl_tree_node *root) * root of the tree). */ struct avl_tree_node * avl_tree_next_in_postorder(const struct avl_tree_node *prev, - const struct avl_tree_node *prev_parent) + const struct avl_tree_node *prev_parent) { - const struct avl_tree_node *next = prev_parent; - - if (next && prev == next->left && next->right) - for (next = next->right; - next->left || next->right; - next = next->left ? next->left : next->right) - ; - return (struct avl_tree_node *)next; + const struct avl_tree_node *next = prev_parent; + + if (next && prev == next->left && next->right) { + for (next = next->right; next->left || next->right; + next = next->left ? next->left : next->right) { + } + } + return (struct avl_tree_node *) next; } /* Sets the left child (sign < 0) or the right child (sign > 0) of the @@ -135,28 +139,29 @@ avl_tree_next_in_postorder(const struct avl_tree_node *prev, * Note: for all calls of this, 'sign' is constant at compilation time, * so the compiler can remove the conditional. */ static AVL_INLINE void -avl_set_child(struct avl_tree_node *parent, int sign, - struct avl_tree_node *child) +avl_set_child(struct avl_tree_node *parent, int sign, struct avl_tree_node *child) { - if (sign < 0) - parent->left = child; - else - parent->right = child; + if (sign < 0) { + parent->left = child; + } else { + parent->right = child; + } } /* Sets the parent and balance factor of the specified AVL tree node. */ static AVL_INLINE void -avl_set_parent_balance(struct avl_tree_node *node, struct avl_tree_node *parent, - int balance_factor) +avl_set_parent_balance(struct avl_tree_node *node, + struct avl_tree_node *parent, + int balance_factor) { - node->parent_balance = (uintptr_t)parent | (balance_factor + 1); + node->parent_balance = (uintptr_t) parent | (balance_factor + 1); } /* Sets the parent of the specified AVL tree node. */ static AVL_INLINE void avl_set_parent(struct avl_tree_node *node, struct avl_tree_node *parent) { - node->parent_balance = (uintptr_t)parent | (node->parent_balance & 3); + node->parent_balance = (uintptr_t) parent | (node->parent_balance & 3); } /* Returns the balance factor of the specified AVL tree node --- that is, the @@ -164,7 +169,7 @@ avl_set_parent(struct avl_tree_node *node, struct avl_tree_node *parent) static AVL_INLINE int avl_get_balance_factor(const struct avl_tree_node *node) { - return (int)(node->parent_balance & 3) - 1; + return (int) (node->parent_balance & 3) - 1; } /* Adds @amount to the balance factor of the specified AVL tree node. @@ -173,23 +178,24 @@ avl_get_balance_factor(const struct avl_tree_node *node) static AVL_INLINE void avl_adjust_balance_factor(struct avl_tree_node *node, int amount) { - node->parent_balance += amount; + node->parent_balance += amount; } static AVL_INLINE void avl_replace_child(struct avl_tree_node **root_ptr, - struct avl_tree_node *parent, - struct avl_tree_node *old_child, - struct avl_tree_node *new_child) + struct avl_tree_node *parent, + struct avl_tree_node *old_child, + struct avl_tree_node *new_child) { - if (parent) { - if (old_child == parent->left) - parent->left = new_child; - else - parent->right = new_child; - } else { - *root_ptr = new_child; - } + if (parent) { + if (old_child == parent->left) { + parent->left = new_child; + } else { + parent->right = new_child; + } + } else { + *root_ptr = new_child; + } } /* @@ -220,30 +226,32 @@ avl_replace_child(struct avl_tree_node **root_ptr, * This updates pointers but not balance factors! */ static AVL_INLINE void -avl_rotate(struct avl_tree_node ** const root_ptr, - struct avl_tree_node * const A, const int sign) +avl_rotate(struct avl_tree_node **const root_ptr, + struct avl_tree_node *const A, + const int sign) { - struct avl_tree_node * const B = avl_get_child(A, -sign); - struct avl_tree_node * const E = avl_get_child(B, +sign); - struct avl_tree_node * const P = avl_get_parent(A); + struct avl_tree_node *const B = avl_get_child(A, -sign); + struct avl_tree_node *const E = avl_get_child(B, +sign); + struct avl_tree_node *const P = avl_get_parent(A); - avl_set_child(A, -sign, E); - avl_set_parent(A, B); + avl_set_child(A, -sign, E); + avl_set_parent(A, B); - avl_set_child(B, +sign, A); - avl_set_parent(B, P); + avl_set_child(B, +sign, A); + avl_set_parent(B, P); - if (E) - avl_set_parent(E, A); + if (E) { + avl_set_parent(E, A); + } - avl_replace_child(root_ptr, P, A, B); + avl_replace_child(root_ptr, P, A, B); } /* * Template for performing a double rotation --- * * sign > 0: Rotate counterclockwise (left) rooted at B, then - * clockwise (right) rooted at A: + * clockwise (right) rooted at A: * * P? P? P? * | | | @@ -258,7 +266,7 @@ avl_rotate(struct avl_tree_node ** const root_ptr, * (nodes marked with ? may not exist) * * sign < 0: Rotate clockwise (right) rooted at B, then - * counterclockwise (left) rooted at A: + * counterclockwise (left) rooted at A: * * P? P? P? * | | | @@ -272,59 +280,62 @@ avl_rotate(struct avl_tree_node ** const root_ptr, * * Returns a pointer to E and updates balance factors. Except for those * two things, this function is equivalent to: - * avl_rotate(root_ptr, B, -sign); - * avl_rotate(root_ptr, A, +sign); + * avl_rotate(root_ptr, B, -sign); + * avl_rotate(root_ptr, A, +sign); * * See comment in avl_handle_subtree_growth() for explanation of balance * factor updates. */ static AVL_INLINE struct avl_tree_node * -avl_do_double_rotate(struct avl_tree_node ** const root_ptr, - struct avl_tree_node * const B, - struct avl_tree_node * const A, const int sign) +avl_do_double_rotate(struct avl_tree_node **const root_ptr, + struct avl_tree_node *const B, + struct avl_tree_node *const A, + const int sign) { - struct avl_tree_node * const E = avl_get_child(B, +sign); - struct avl_tree_node * const F = avl_get_child(E, -sign); - struct avl_tree_node * const G = avl_get_child(E, +sign); - struct avl_tree_node * const P = avl_get_parent(A); - const int e = avl_get_balance_factor(E); + struct avl_tree_node *const E = avl_get_child(B, +sign); + struct avl_tree_node *const F = avl_get_child(E, -sign); + struct avl_tree_node *const G = avl_get_child(E, +sign); + struct avl_tree_node *const P = avl_get_parent(A); + const int e = avl_get_balance_factor(E); - avl_set_child(A, -sign, G); - avl_set_parent_balance(A, E, ((sign * e >= 0) ? 0 : -e)); + avl_set_child(A, -sign, G); + avl_set_parent_balance(A, E, ((sign * e >= 0) ? 0 : -e)); - avl_set_child(B, +sign, F); - avl_set_parent_balance(B, E, ((sign * e <= 0) ? 0 : -e)); + avl_set_child(B, +sign, F); + avl_set_parent_balance(B, E, ((sign * e <= 0) ? 0 : -e)); - avl_set_child(E, +sign, A); - avl_set_child(E, -sign, B); - avl_set_parent_balance(E, P, 0); + avl_set_child(E, +sign, A); + avl_set_child(E, -sign, B); + avl_set_parent_balance(E, P, 0); - if (G) - avl_set_parent(G, A); + if (G) { + avl_set_parent(G, A); + } - if (F) - avl_set_parent(F, B); + if (F) { + avl_set_parent(F, B); + } - avl_replace_child(root_ptr, P, A, E); + avl_replace_child(root_ptr, P, A, E); - return E; + return E; } /* * This function handles the growth of a subtree due to an insertion. * * @root_ptr - * Location of the tree's root pointer. + * Location of the tree's root pointer. * * @node - * A subtree that has increased in height by 1 due to an insertion. + * A subtree that has increased in height by 1 due to an insertion. * * @parent - * Parent of @node; must not be NULL. + * Parent of @node; must not be NULL. * * @sign - * -1 if @node is the left child of @parent; - * +1 if @node is the right child of @parent. + * -1 if @node is the left child of @parent; + * +1 if @node is the right child of @parent. * * This function will adjust @parent's balance factor, then do a (single * or double) rotation if necessary. The return value will be %true if @@ -337,202 +348,205 @@ avl_do_double_rotate(struct avl_tree_node ** const root_ptr, * (single or double) rotation be done. */ static AVL_INLINE bool -avl_handle_subtree_growth(struct avl_tree_node ** const root_ptr, - struct avl_tree_node * const node, - struct avl_tree_node * const parent, - const int sign) +avl_handle_subtree_growth(struct avl_tree_node **const root_ptr, + struct avl_tree_node *const node, + struct avl_tree_node *const parent, + const int sign) { - int old_balance_factor, new_balance_factor; - - old_balance_factor = avl_get_balance_factor(parent); - - if (old_balance_factor == 0) { - avl_adjust_balance_factor(parent, sign); - /* @parent is still sufficiently balanced (-1 or +1 - * balance factor), but must have increased in height. - * Continue up the tree. */ - return false; - } - - new_balance_factor = old_balance_factor + sign; - - if (new_balance_factor == 0) { - avl_adjust_balance_factor(parent, sign); - /* @parent is now perfectly balanced (0 balance factor). - * It cannot have increased in height, so there is - * nothing more to do. */ - return true; - } - - /* @parent is too left-heavy (new_balance_factor == -2) or - * too right-heavy (new_balance_factor == +2). */ - - /* Test whether @node is left-heavy (-1 balance factor) or - * right-heavy (+1 balance factor). - * Note that it cannot be perfectly balanced (0 balance factor) - * because here we are under the invariant that @node has - * increased in height due to the insertion. */ - if (sign * avl_get_balance_factor(node) > 0) { - - /* @node (B below) is heavy in the same direction @parent - * (A below) is heavy. - * - * @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - * The comment, diagram, and equations below assume sign < 0. - * The other case is symmetric! - * @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - * - * Do a clockwise rotation rooted at @parent (A below): - * - * A B - * / \ / \ - * B C? => D A - * / \ / \ / \ - * D E? F? G?E? C? - * / \ - * F? G? - * - * Before the rotation: - * balance(A) = -2 - * balance(B) = -1 - * Let x = height(C). Then: - * height(B) = x + 2 - * height(D) = x + 1 - * height(E) = x - * max(height(F), height(G)) = x. - * - * After the rotation: - * height(D) = max(height(F), height(G)) + 1 - * = x + 1 - * height(A) = max(height(E), height(C)) + 1 - * = max(x, x) + 1 = x + 1 - * balance(B) = 0 - * balance(A) = 0 - */ - avl_rotate(root_ptr, parent, -sign); - - /* Equivalent to setting @parent's balance factor to 0. */ - avl_adjust_balance_factor(parent, -sign); /* A */ - - /* Equivalent to setting @node's balance factor to 0. */ - avl_adjust_balance_factor(node, -sign); /* B */ - } else { - /* @node (B below) is heavy in the direction opposite - * from the direction @parent (A below) is heavy. - * - * @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - * The comment, diagram, and equations below assume sign < 0. - * The other case is symmetric! - * @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - * - * Do a counterblockwise rotation rooted at @node (B below), - * then a clockwise rotation rooted at @parent (A below): - * - * A A E - * / \ / \ / \ - * B C? => E C? => B A - * / \ / \ / \ / \ - * D? E B G? D? F?G? C? - * / \ / \ - * F? G? D? F? - * - * Before the rotation: - * balance(A) = -2 - * balance(B) = +1 - * Let x = height(C). Then: - * height(B) = x + 2 - * height(E) = x + 1 - * height(D) = x - * max(height(F), height(G)) = x - * - * After both rotations: - * height(A) = max(height(G), height(C)) + 1 - * = x + 1 - * balance(A) = balance(E{orig}) >= 0 ? 0 : -balance(E{orig}) - * height(B) = max(height(D), height(F)) + 1 - * = x + 1 - * balance(B) = balance(E{orig} <= 0) ? 0 : -balance(E{orig}) - * - * height(E) = x + 2 - * balance(E) = 0 - */ - avl_do_double_rotate(root_ptr, node, parent, -sign); - } - - /* Height after rotation is unchanged; nothing more to do. */ - return true; + int old_balance_factor, new_balance_factor; + + old_balance_factor = avl_get_balance_factor(parent); + + if (old_balance_factor == 0) { + avl_adjust_balance_factor(parent, sign); + /* @parent is still sufficiently balanced (-1 or +1 + * balance factor), but must have increased in height. + * Continue up the tree. */ + return false; + } + + new_balance_factor = old_balance_factor + sign; + + if (new_balance_factor == 0) { + avl_adjust_balance_factor(parent, sign); + /* @parent is now perfectly balanced (0 balance factor). + * It cannot have increased in height, so there is + * nothing more to do. */ + return true; + } + + /* @parent is too left-heavy (new_balance_factor == -2) or + * too right-heavy (new_balance_factor == +2). */ + + /* Test whether @node is left-heavy (-1 balance factor) or + * right-heavy (+1 balance factor). + * Note that it cannot be perfectly balanced (0 balance factor) + * because here we are under the invariant that @node has + * increased in height due to the insertion. */ + if (sign * avl_get_balance_factor(node) > 0) { + + /* @node (B below) is heavy in the same direction @parent + * (A below) is heavy. + * + * @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + * The comment, diagram, and equations below assume sign < 0. + * The other case is symmetric! + * @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + * + * Do a clockwise rotation rooted at @parent (A below): + * + * A B + * / \ / \ + * B C? => D A + * / \ / \ / \ + * D E? F? G?E? C? + * / \ + * F? G? + * + * Before the rotation: + * balance(A) = -2 + * balance(B) = -1 + * Let x = height(C). Then: + * height(B) = x + 2 + * height(D) = x + 1 + * height(E) = x + * max(height(F), height(G)) = x. + * + * After the rotation: + * height(D) = max(height(F), height(G)) + 1 + * = x + 1 + * height(A) = max(height(E), height(C)) + 1 + * = max(x, x) + 1 = x + 1 + * balance(B) = 0 + * balance(A) = 0 + */ + avl_rotate(root_ptr, parent, -sign); + + /* Equivalent to setting @parent's balance factor to 0. */ + avl_adjust_balance_factor(parent, -sign); /* A */ + + /* Equivalent to setting @node's balance factor to 0. */ + avl_adjust_balance_factor(node, -sign); /* B */ + } else { + /* @node (B below) is heavy in the direction opposite + * from the direction @parent (A below) is heavy. + * + * @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + * The comment, diagram, and equations below assume sign < 0. + * The other case is symmetric! + * @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + * + * Do a counterblockwise rotation rooted at @node (B below), + * then a clockwise rotation rooted at @parent (A below): + * + * A A E + * / \ / \ / \ + * B C? => E C? => B A + * / \ / \ / \ / \ + * D? E B G? D? F?G? C? + * / \ / \ + * F? G? D? F? + * + * Before the rotation: + * balance(A) = -2 + * balance(B) = +1 + * Let x = height(C). Then: + * height(B) = x + 2 + * height(E) = x + 1 + * height(D) = x + * max(height(F), height(G)) = x + * + * After both rotations: + * height(A) = max(height(G), height(C)) + 1 + * = x + 1 + * balance(A) = balance(E{orig}) >= 0 ? 0 : -balance(E{orig}) + * height(B) = max(height(D), height(F)) + 1 + * = x + 1 + * balance(B) = balance(E{orig} <= 0) ? 0 : -balance(E{orig}) + * + * height(E) = x + 2 + * balance(E) = 0 + */ + avl_do_double_rotate(root_ptr, node, parent, -sign); + } + + /* Height after rotation is unchanged; nothing more to do. */ + return true; } /* Rebalance the tree after insertion of the specified node. */ void avl_tree_rebalance_after_insert(struct avl_tree_node **root_ptr, - struct avl_tree_node *inserted) + struct avl_tree_node *inserted) { - struct avl_tree_node *node, *parent; - bool done; - - inserted->left = NULL; - inserted->right = NULL; - - node = inserted; - - /* Adjust balance factor of new node's parent. - * No rotation will need to be done at this level. */ - - parent = avl_get_parent(node); - if (!parent) - return; - - if (node == parent->left) - avl_adjust_balance_factor(parent, -1); - else - avl_adjust_balance_factor(parent, +1); - - if (avl_get_balance_factor(parent) == 0) - /* @parent did not change in height. Nothing more to do. */ - return; - - /* The subtree rooted at @parent increased in height by 1. */ - - do { - /* Adjust balance factor of next ancestor. */ - - node = parent; - parent = avl_get_parent(node); - if (!parent) - return; - - /* The subtree rooted at @node has increased in height by 1. */ - if (node == parent->left) - done = avl_handle_subtree_growth(root_ptr, node, - parent, -1); - else - done = avl_handle_subtree_growth(root_ptr, node, - parent, +1); - } while (!done); + struct avl_tree_node *node, *parent; + bool done; + + inserted->left = NULL; + inserted->right = NULL; + + node = inserted; + + /* Adjust balance factor of new node's parent. + * No rotation will need to be done at this level. */ + + parent = avl_get_parent(node); + if (!parent) { + return; + } + + if (node == parent->left) { + avl_adjust_balance_factor(parent, -1); + } else { + avl_adjust_balance_factor(parent, +1); + } + + if (avl_get_balance_factor(parent) == 0) { + /* @parent did not change in height. Nothing more to do. */ + return; + } + + /* The subtree rooted at @parent increased in height by 1. */ + + do { + /* Adjust balance factor of next ancestor. */ + + node = parent; + parent = avl_get_parent(node); + if (!parent) { + return; + } + + /* The subtree rooted at @node has increased in height by 1. */ + if (node == parent->left) { + done = avl_handle_subtree_growth(root_ptr, node, parent, -1); + } else { + done = avl_handle_subtree_growth(root_ptr, node, parent, +1); + } + } while (!done); } /* * This function handles the shrinkage of a subtree due to a deletion. * * @root_ptr - * Location of the tree's root pointer. + * Location of the tree's root pointer. * * @parent - * A node in the tree, exactly one of whose subtrees has decreased - * in height by 1 due to a deletion. (This includes the case where - * one of the child pointers has become NULL, since we can consider - * the "NULL" subtree to have a height of 0.) + * A node in the tree, exactly one of whose subtrees has decreased + * in height by 1 due to a deletion. (This includes the case where + * one of the child pointers has become NULL, since we can consider + * the "NULL" subtree to have a height of 0.) * * @sign - * +1 if the left subtree of @parent has decreased in height by 1; - * -1 if the right subtree of @parent has decreased in height by 1. + * +1 if the left subtree of @parent has decreased in height by 1; + * -1 if the right subtree of @parent has decreased in height by 1. * * @left_deleted_ret - * If the return value is not NULL, this will be set to %true if the - * left subtree of the returned node has decreased in height by 1, - * or %false if the right subtree of the returned node has decreased - * in height by 1. + * If the return value is not NULL, this will be set to %true if the + * left subtree of the returned node has decreased in height by 1, + * or %false if the right subtree of the returned node has decreased + * in height by 1. * * This function will adjust @parent's balance factor, then do a (single * or double) rotation if necessary. The return value will be NULL if @@ -542,114 +556,114 @@ avl_tree_rebalance_after_insert(struct avl_tree_node **root_ptr, * will be set. */ static AVL_INLINE struct avl_tree_node * -avl_handle_subtree_shrink(struct avl_tree_node ** const root_ptr, - struct avl_tree_node *parent, - const int sign, - bool * const left_deleted_ret) +avl_handle_subtree_shrink(struct avl_tree_node **const root_ptr, + struct avl_tree_node *parent, + const int sign, + bool *const left_deleted_ret) { - struct avl_tree_node *node; - int old_balance_factor, new_balance_factor; - - old_balance_factor = avl_get_balance_factor(parent); - - if (old_balance_factor == 0) { - /* Prior to the deletion, the subtree rooted at - * @parent was perfectly balanced. It's now - * unbalanced by 1, but that's okay and its height - * hasn't changed. Nothing more to do. */ - avl_adjust_balance_factor(parent, sign); - return NULL; - } - - new_balance_factor = old_balance_factor + sign; - - if (new_balance_factor == 0) { - /* The subtree rooted at @parent is now perfectly - * balanced, whereas before the deletion it was - * unbalanced by 1. Its height must have decreased - * by 1. No rotation is needed at this location, - * but continue up the tree. */ - avl_adjust_balance_factor(parent, sign); - node = parent; - } else { - /* @parent is too left-heavy (new_balance_factor == -2) or - * too right-heavy (new_balance_factor == +2). */ - - node = avl_get_child(parent, sign); - - /* The rotations below are similar to those done during - * insertion (see avl_handle_subtree_growth()), so full - * comments are not provided. The only new case is the - * one where @node has a balance factor of 0, and that is - * commented. */ - - if (sign * avl_get_balance_factor(node) >= 0) { - - avl_rotate(root_ptr, parent, -sign); - - if (avl_get_balance_factor(node) == 0) { - /* - * @node (B below) is perfectly balanced. - * - * @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - * The comment, diagram, and equations - * below assume sign < 0. The other case - * is symmetric! - * @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - * - * Do a clockwise rotation rooted at - * @parent (A below): - * - * A B - * / \ / \ - * B C? => D A - * / \ / \ / \ - * D E F? G?E C? - * / \ - * F? G? - * - * Before the rotation: - * balance(A) = -2 - * balance(B) = 0 - * Let x = height(C). Then: - * height(B) = x + 2 - * height(D) = x + 1 - * height(E) = x + 1 - * max(height(F), height(G)) = x. - * - * After the rotation: - * height(D) = max(height(F), height(G)) + 1 - * = x + 1 - * height(A) = max(height(E), height(C)) + 1 - * = max(x + 1, x) + 1 = x + 2 - * balance(A) = -1 - * balance(B) = +1 - */ - - /* A: -2 => -1 (sign < 0) - * or +2 => +1 (sign > 0) - * No change needed --- that's the same as - * old_balance_factor. */ - - /* B: 0 => +1 (sign < 0) - * or 0 => -1 (sign > 0) */ - avl_adjust_balance_factor(node, -sign); - - /* Height is unchanged; nothing more to do. */ - return NULL; - } else { - avl_adjust_balance_factor(parent, -sign); - avl_adjust_balance_factor(node, -sign); - } - } else { - node = avl_do_double_rotate(root_ptr, node, - parent, -sign); - } - } - parent = avl_get_parent(node); - if (parent) - *left_deleted_ret = (node == parent->left); - return parent; + struct avl_tree_node *node; + int old_balance_factor, new_balance_factor; + + old_balance_factor = avl_get_balance_factor(parent); + + if (old_balance_factor == 0) { + /* Prior to the deletion, the subtree rooted at + * @parent was perfectly balanced. It's now + * unbalanced by 1, but that's okay and its height + * hasn't changed. Nothing more to do. */ + avl_adjust_balance_factor(parent, sign); + return NULL; + } + + new_balance_factor = old_balance_factor + sign; + + if (new_balance_factor == 0) { + /* The subtree rooted at @parent is now perfectly + * balanced, whereas before the deletion it was + * unbalanced by 1. Its height must have decreased + * by 1. No rotation is needed at this location, + * but continue up the tree. */ + avl_adjust_balance_factor(parent, sign); + node = parent; + } else { + /* @parent is too left-heavy (new_balance_factor == -2) or + * too right-heavy (new_balance_factor == +2). */ + + node = avl_get_child(parent, sign); + + /* The rotations below are similar to those done during + * insertion (see avl_handle_subtree_growth()), so full + * comments are not provided. The only new case is the + * one where @node has a balance factor of 0, and that is + * commented. */ + + if (sign * avl_get_balance_factor(node) >= 0) { + + avl_rotate(root_ptr, parent, -sign); + + if (avl_get_balance_factor(node) == 0) { + /* + * @node (B below) is perfectly balanced. + * + * @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + * The comment, diagram, and equations + * below assume sign < 0. The other case + * is symmetric! + * @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + * + * Do a clockwise rotation rooted at + * @parent (A below): + * + * A B + * / \ / \ + * B C? => D A + * / \ / \ / \ + * D E F? G?E C? + * / \ + * F? G? + * + * Before the rotation: + * balance(A) = -2 + * balance(B) = 0 + * Let x = height(C). Then: + * height(B) = x + 2 + * height(D) = x + 1 + * height(E) = x + 1 + * max(height(F), height(G)) = x. + * + * After the rotation: + * height(D) = max(height(F), height(G)) + 1 + * = x + 1 + * height(A) = max(height(E), height(C)) + 1 + * = max(x + 1, x) + 1 = x + 2 + * balance(A) = -1 + * balance(B) = +1 + */ + + /* A: -2 => -1 (sign < 0) + * or +2 => +1 (sign > 0) + * No change needed --- that's the same as + * old_balance_factor. */ + + /* B: 0 => +1 (sign < 0) + * or 0 => -1 (sign > 0) */ + avl_adjust_balance_factor(node, -sign); + + /* Height is unchanged; nothing more to do. */ + return NULL; + } else { + avl_adjust_balance_factor(parent, -sign); + avl_adjust_balance_factor(node, -sign); + } + } else { + node = avl_do_double_rotate(root_ptr, node, parent, -sign); + } + } + parent = avl_get_parent(node); + if (parent) { + *left_deleted_ret = (node == parent->left); + } + return parent; } /* Swaps node X, which must have 2 children, with its in-order successor, then @@ -657,80 +671,81 @@ avl_handle_subtree_shrink(struct avl_tree_node ** const root_ptr, * balance factor having been updated to account for the unlink. */ static AVL_INLINE struct avl_tree_node * avl_tree_swap_with_successor(struct avl_tree_node **root_ptr, - struct avl_tree_node *X, - bool *left_deleted_ret) + struct avl_tree_node *X, + bool *left_deleted_ret) { - struct avl_tree_node *Y, *ret; - - Y = X->right; - if (!Y->left) { - /* - * P? P? P? - * | | | - * X Y Y - * / \ / \ / \ - * A Y => A X => A B? - * / \ / \ - * (0) B? (0) B? - * - * [ X unlinked, Y returned ] - */ - ret = Y; - *left_deleted_ret = false; - } else { - struct avl_tree_node *Q; - - do { - Q = Y; - Y = Y->left; - } while (Y->left); - - /* - * P? P? P? - * | | | - * X Y Y - * / \ / \ / \ - * A ... => A ... => A ... - * | | | - * Q Q Q - * / / / - * Y X B? - * / \ / \ - * (0) B? (0) B? - * - * - * [ X unlinked, Q returned ] - */ - - Q->left = Y->right; - if (Q->left) - avl_set_parent(Q->left, Q); - Y->right = X->right; - avl_set_parent(X->right, Y); - ret = Q; - *left_deleted_ret = true; - } - - Y->left = X->left; - avl_set_parent(X->left, Y); - - Y->parent_balance = X->parent_balance; - avl_replace_child(root_ptr, avl_get_parent(X), X, Y); - - return ret; + struct avl_tree_node *Y, *ret; + + Y = X->right; + if (!Y->left) { + /* + * P? P? P? + * | | | + * X Y Y + * / \ / \ / \ + * A Y => A X => A B? + * / \ / \ + * (0) B? (0) B? + * + * [ X unlinked, Y returned ] + */ + ret = Y; + *left_deleted_ret = false; + } else { + struct avl_tree_node *Q; + + do { + Q = Y; + Y = Y->left; + } while (Y->left); + + /* + * P? P? P? + * | | | + * X Y Y + * / \ / \ / \ + * A ... => A ... => A ... + * | | | + * Q Q Q + * / / / + * Y X B? + * / \ / \ + * (0) B? (0) B? + * + * + * [ X unlinked, Q returned ] + */ + + Q->left = Y->right; + if (Q->left) { + avl_set_parent(Q->left, Q); + } + Y->right = X->right; + avl_set_parent(X->right, Y); + ret = Q; + *left_deleted_ret = true; + } + + Y->left = X->left; + avl_set_parent(X->left, Y); + + Y->parent_balance = X->parent_balance; + avl_replace_child(root_ptr, avl_get_parent(X), X, Y); + + return ret; } /* * Removes an item from the specified AVL tree. * * @root_ptr - * Location of the AVL tree's root pointer. Indirection is needed - * because the root node may change if the tree needed to be rebalanced - * because of the deletion or if @node was the root node. + * Location of the AVL tree's root pointer. Indirection is needed + * because the root node may change if the tree needed to be rebalanced + * because of the deletion or if @node was the root node. * * @node - * Pointer to the `struct avl_tree_node' embedded in the item to - * remove from the tree. + * Pointer to the `struct avl_tree_node' embedded in the item to + * remove from the tree. * * Note: This function *only* removes the node and rebalances the tree. * It does not free any memory, nor does it do the equivalent of @@ -739,57 +754,57 @@ avl_tree_swap_with_successor(struct avl_tree_node **root_ptr, void avl_tree_remove(struct avl_tree_node **root_ptr, struct avl_tree_node *node) { - struct avl_tree_node *parent; - bool left_deleted = false; - - if (node->left && node->right) { - /* @node is fully internal, with two children. Swap it - * with its in-order successor (which must exist in the - * right subtree of @node and can have, at most, a right - * child), then unlink @node. */ - parent = avl_tree_swap_with_successor(root_ptr, node, - &left_deleted); - /* @parent is now the parent of what was @node's in-order - * successor. It cannot be NULL, since @node itself was - * an ancestor of its in-order successor. - * @left_deleted has been set to %true if @node's - * in-order successor was the left child of @parent, - * otherwise %false. */ - } else { - struct avl_tree_node *child; - - /* @node is missing at least one child. Unlink it. Set - * @parent to @node's parent, and set @left_deleted to - * reflect which child of @parent @node was. Or, if - * @node was the root node, simply update the root node - * and return. */ - child = node->left ? node->left : node->right; - parent = avl_get_parent(node); - if (parent) { - if (node == parent->left) { - parent->left = child; - left_deleted = true; - } else { - parent->right = child; - left_deleted = false; - } - if (child) - avl_set_parent(child, parent); - } else { - if (child) - avl_set_parent(child, parent); - *root_ptr = child; - return; - } - } - - /* Rebalance the tree. */ - do { - if (left_deleted) - parent = avl_handle_subtree_shrink(root_ptr, parent, - +1, &left_deleted); - else - parent = avl_handle_subtree_shrink(root_ptr, parent, - -1, &left_deleted); - } while (parent); + struct avl_tree_node *parent; + bool left_deleted = false; + + if (node->left && node->right) { + /* @node is fully internal, with two children. Swap it + * with its in-order successor (which must exist in the + * right subtree of @node and can have, at most, a right + * child), then unlink @node. */ + parent = avl_tree_swap_with_successor(root_ptr, node, &left_deleted); + /* @parent is now the parent of what was @node's in-order + * successor. It cannot be NULL, since @node itself was + * an ancestor of its in-order successor. + * @left_deleted has been set to %true if @node's + * in-order successor was the left child of @parent, + * otherwise %false. */ + } else { + struct avl_tree_node *child; + + /* @node is missing at least one child. Unlink it. Set + * @parent to @node's parent, and set @left_deleted to + * reflect which child of @parent @node was. Or, if + * @node was the root node, simply update the root node + * and return. */ + child = node->left ? node->left : node->right; + parent = avl_get_parent(node); + if (parent) { + if (node == parent->left) { + parent->left = child; + left_deleted = true; + } else { + parent->right = child; + left_deleted = false; + } + if (child) { + avl_set_parent(child, parent); + } + } else { + if (child) { + avl_set_parent(child, parent); + } + *root_ptr = child; + return; + } + } + + /* Rebalance the tree. */ + do { + if (left_deleted) { + parent = avl_handle_subtree_shrink(root_ptr, parent, +1, &left_deleted); + } else { + parent = avl_handle_subtree_shrink(root_ptr, parent, -1, &left_deleted); + } + } while (parent); } diff --git a/src/mwparserfromhell/parser/ctokenizer/avl_tree.h b/src/mwparserfromhell/parser/ctokenizer/avl_tree.h index 9caa2bc..477e256 100644 --- a/src/mwparserfromhell/parser/ctokenizer/avl_tree.h +++ b/src/mwparserfromhell/parser/ctokenizer/avl_tree.h @@ -1,6 +1,6 @@ /* * avl_tree.h - intrusive, nonrecursive AVL tree data structure (self-balancing - * binary search tree), header file + * binary search tree), header file * * Written in 2014-2016 by Eric Biggers * Slight changes for compatibility by Ben Kurtovic @@ -24,60 +24,60 @@ #include #if !defined(_MSC_VER) || (_MSC_VER >= 1600) -#include +# include #endif #ifdef __GNUC__ -# define AVL_INLINE inline __attribute__((always_inline)) +# define AVL_INLINE inline __attribute__((always_inline)) #elif defined(_MSC_VER) && (_MSC_VER < 1900) -# define AVL_INLINE __inline +# define AVL_INLINE __inline #else -# define AVL_INLINE inline +# define AVL_INLINE inline #endif /* Node in an AVL tree. Embed this in some other data structure. */ struct avl_tree_node { - /* Pointer to left child or NULL */ - struct avl_tree_node *left; + /* Pointer to left child or NULL */ + struct avl_tree_node *left; - /* Pointer to right child or NULL */ - struct avl_tree_node *right; + /* Pointer to right child or NULL */ + struct avl_tree_node *right; - /* Pointer to parent combined with the balance factor. This saves 4 or - * 8 bytes of memory depending on the CPU architecture. - * - * Low 2 bits: One greater than the balance factor of this subtree, - * which is equal to height(right) - height(left). The mapping is: - * - * 00 => -1 - * 01 => 0 - * 10 => +1 - * 11 => undefined - * - * The rest of the bits are the pointer to the parent node. It must be - * 4-byte aligned, and it will be NULL if this is the root node and - * therefore has no parent. */ - uintptr_t parent_balance; + /* Pointer to parent combined with the balance factor. This saves 4 or + * 8 bytes of memory depending on the CPU architecture. + * + * Low 2 bits: One greater than the balance factor of this subtree, + * which is equal to height(right) - height(left). The mapping is: + * + * 00 => -1 + * 01 => 0 + * 10 => +1 + * 11 => undefined + * + * The rest of the bits are the pointer to the parent node. It must be + * 4-byte aligned, and it will be NULL if this is the root node and + * therefore has no parent. */ + uintptr_t parent_balance; }; /* Cast an AVL tree node to the containing data structure. */ -#define avl_tree_entry(entry, type, member) \ - ((type*) ((char *)(entry) - offsetof(type, member))) +#define avl_tree_entry(entry, type, member) \ + ((type *) ((char *) (entry) -offsetof(type, member))) /* Returns a pointer to the parent of the specified AVL tree node, or NULL if it * is already the root of the tree. */ static AVL_INLINE struct avl_tree_node * avl_get_parent(const struct avl_tree_node *node) { - return (struct avl_tree_node *)(node->parent_balance & ~3); + return (struct avl_tree_node *) (node->parent_balance & ~3); } /* Marks the specified AVL tree node as unlinked from any tree. */ static AVL_INLINE void avl_tree_node_set_unlinked(struct avl_tree_node *node) { - node->parent_balance = (uintptr_t)node; + node->parent_balance = (uintptr_t) node; } /* Returns true iff the specified AVL tree node has been marked with @@ -86,30 +86,29 @@ avl_tree_node_set_unlinked(struct avl_tree_node *node) static AVL_INLINE int avl_tree_node_is_unlinked(const struct avl_tree_node *node) { - return node->parent_balance == (uintptr_t)node; + return node->parent_balance == (uintptr_t) node; } /* (Internal use only) */ -extern void -avl_tree_rebalance_after_insert(struct avl_tree_node **root_ptr, - struct avl_tree_node *inserted); +extern void avl_tree_rebalance_after_insert(struct avl_tree_node **root_ptr, + struct avl_tree_node *inserted); /* * Looks up an item in the specified AVL tree. * * @root - * Pointer to the root of the AVL tree. (This can be NULL --- that just - * means the tree is empty.) + * Pointer to the root of the AVL tree. (This can be NULL --- that just + * means the tree is empty.) * * @cmp_ctx - * First argument to pass to the comparison callback. This generally - * should be a pointer to an object equal to the one being searched for. + * First argument to pass to the comparison callback. This generally + * should be a pointer to an object equal to the one being searched for. * * @cmp - * Comparison callback. Must return < 0, 0, or > 0 if the first argument - * is less than, equal to, or greater than the second argument, - * respectively. The first argument will be @cmp_ctx and the second - * argument will be a pointer to the AVL tree node of an item in the tree. + * Comparison callback. Must return < 0, 0, or > 0 if the first argument + * is less than, equal to, or greater than the second argument, + * respectively. The first argument will be @cmp_ctx and the second + * argument will be a pointer to the AVL tree node of an item in the tree. * * Returns a pointer to the AVL tree node of the resulting item, or NULL if the * item was not found. @@ -117,48 +116,49 @@ avl_tree_rebalance_after_insert(struct avl_tree_node **root_ptr, * Example: * * struct int_wrapper { - * int data; - * struct avl_tree_node index_node; + * int data; + * struct avl_tree_node index_node; * }; * * static int _avl_cmp_int_to_node(const void *intptr, - * const struct avl_tree_node *nodeptr) + * const struct avl_tree_node *nodeptr) * { - * int n1 = *(const int *)intptr; - * int n2 = avl_tree_entry(nodeptr, struct int_wrapper, index_node)->data; - * if (n1 < n2) - * return -1; - * else if (n1 > n2) - * return 1; - * else - * return 0; + * int n1 = *(const int *)intptr; + * int n2 = avl_tree_entry(nodeptr, struct int_wrapper, index_node)->data; + * if (n1 < n2) + * return -1; + * else if (n1 > n2) + * return 1; + * else + * return 0; * } * * bool contains_int(struct avl_tree_node *root, int n) * { - * struct avl_tree_node *result; + * struct avl_tree_node *result; * - * result = avl_tree_lookup(root, &n, _avl_cmp_int_to_node); - * return result ? true : false; + * result = avl_tree_lookup(root, &n, _avl_cmp_int_to_node); + * return result ? true : false; * } */ static AVL_INLINE struct avl_tree_node * avl_tree_lookup(const struct avl_tree_node *root, - const void *cmp_ctx, - int (*cmp)(const void *, const struct avl_tree_node *)) + const void *cmp_ctx, + int (*cmp)(const void *, const struct avl_tree_node *)) { - const struct avl_tree_node *cur = root; + const struct avl_tree_node *cur = root; - while (cur) { - int res = (*cmp)(cmp_ctx, cur); - if (res < 0) - cur = cur->left; - else if (res > 0) - cur = cur->right; - else - break; - } - return (struct avl_tree_node*)cur; + while (cur) { + int res = (*cmp)(cmp_ctx, cur); + if (res < 0) { + cur = cur->left; + } else if (res > 0) { + cur = cur->right; + } else { + break; + } + } + return (struct avl_tree_node *) cur; } /* Same as avl_tree_lookup(), but uses a more specific type for the comparison @@ -167,44 +167,45 @@ avl_tree_lookup(const struct avl_tree_node *root, * embedded 'struct avl_tree_node'. */ static AVL_INLINE struct avl_tree_node * avl_tree_lookup_node(const struct avl_tree_node *root, - const struct avl_tree_node *node, - int (*cmp)(const struct avl_tree_node *, - const struct avl_tree_node *)) + const struct avl_tree_node *node, + int (*cmp)(const struct avl_tree_node *, + const struct avl_tree_node *)) { - const struct avl_tree_node *cur = root; + const struct avl_tree_node *cur = root; - while (cur) { - int res = (*cmp)(node, cur); - if (res < 0) - cur = cur->left; - else if (res > 0) - cur = cur->right; - else - break; - } - return (struct avl_tree_node*)cur; + while (cur) { + int res = (*cmp)(node, cur); + if (res < 0) { + cur = cur->left; + } else if (res > 0) { + cur = cur->right; + } else { + break; + } + } + return (struct avl_tree_node *) cur; } /* * Inserts an item into the specified AVL tree. * * @root_ptr - * Location of the AVL tree's root pointer. Indirection is needed because - * the root node may change as a result of rotations caused by the - * insertion. Initialize *root_ptr to NULL for an empty tree. + * Location of the AVL tree's root pointer. Indirection is needed because + * the root node may change as a result of rotations caused by the + * insertion. Initialize *root_ptr to NULL for an empty tree. * * @item - * Pointer to the `struct avl_tree_node' embedded in the item to insert. - * No members in it need be pre-initialized, although members in the - * containing structure should be pre-initialized so that @cmp can use them - * in comparisons. + * Pointer to the `struct avl_tree_node' embedded in the item to insert. + * No members in it need be pre-initialized, although members in the + * containing structure should be pre-initialized so that @cmp can use them + * in comparisons. * * @cmp - * Comparison callback. Must return < 0, 0, or > 0 if the first argument - * is less than, equal to, or greater than the second argument, - * respectively. The first argument will be @item and the second - * argument will be a pointer to an AVL tree node embedded in some - * previously-inserted item to which @item is being compared. + * Comparison callback. Must return < 0, 0, or > 0 if the first argument + * is less than, equal to, or greater than the second argument, + * respectively. The first argument will be @item and the second + * argument will be a pointer to an AVL tree node embedded in some + * previously-inserted item to which @item is being compared. * * If no item in the tree is comparatively equal (via @cmp) to @item, inserts * @item and returns NULL. Otherwise does nothing and returns a pointer to the @@ -214,150 +215,138 @@ avl_tree_lookup_node(const struct avl_tree_node *root, * Example: * * struct int_wrapper { - * int data; - * struct avl_tree_node index_node; + * int data; + * struct avl_tree_node index_node; * }; * * #define GET_DATA(i) avl_tree_entry((i), struct int_wrapper, index_node)->data * * static int _avl_cmp_ints(const struct avl_tree_node *node1, - * const struct avl_tree_node *node2) + * const struct avl_tree_node *node2) * { - * int n1 = GET_DATA(node1); - * int n2 = GET_DATA(node2); - * if (n1 < n2) - * return -1; - * else if (n1 > n2) - * return 1; - * else - * return 0; + * int n1 = GET_DATA(node1); + * int n2 = GET_DATA(node2); + * if (n1 < n2) + * return -1; + * else if (n1 > n2) + * return 1; + * else + * return 0; * } * * bool insert_int(struct avl_tree_node **root_ptr, int data) * { - * struct int_wrapper *i = malloc(sizeof(struct int_wrapper)); - * i->data = data; - * if (avl_tree_insert(root_ptr, &i->index_node, _avl_cmp_ints)) { - * // Duplicate. - * free(i); - * return false; - * } - * return true; + * struct int_wrapper *i = malloc(sizeof(struct int_wrapper)); + * i->data = data; + * if (avl_tree_insert(root_ptr, &i->index_node, _avl_cmp_ints)) { + * // Duplicate. + * free(i); + * return false; + * } + * return true; * } */ static AVL_INLINE struct avl_tree_node * avl_tree_insert(struct avl_tree_node **root_ptr, - struct avl_tree_node *item, - int (*cmp)(const struct avl_tree_node *, - const struct avl_tree_node *)) + struct avl_tree_node *item, + int (*cmp)(const struct avl_tree_node *, const struct avl_tree_node *)) { - struct avl_tree_node **cur_ptr = root_ptr, *cur = NULL; - int res; + struct avl_tree_node **cur_ptr = root_ptr, *cur = NULL; + int res; - while (*cur_ptr) { - cur = *cur_ptr; - res = (*cmp)(item, cur); - if (res < 0) - cur_ptr = &cur->left; - else if (res > 0) - cur_ptr = &cur->right; - else - return cur; - } - *cur_ptr = item; - item->parent_balance = (uintptr_t)cur | 1; - avl_tree_rebalance_after_insert(root_ptr, item); - return NULL; + while (*cur_ptr) { + cur = *cur_ptr; + res = (*cmp)(item, cur); + if (res < 0) { + cur_ptr = &cur->left; + } else if (res > 0) { + cur_ptr = &cur->right; + } else { + return cur; + } + } + *cur_ptr = item; + item->parent_balance = (uintptr_t) cur | 1; + avl_tree_rebalance_after_insert(root_ptr, item); + return NULL; } /* Removes an item from the specified AVL tree. * See implementation for details. */ -extern void -avl_tree_remove(struct avl_tree_node **root_ptr, struct avl_tree_node *node); +extern void avl_tree_remove(struct avl_tree_node **root_ptr, + struct avl_tree_node *node); /* Nonrecursive AVL tree traversal functions */ -extern struct avl_tree_node * -avl_tree_first_in_order(const struct avl_tree_node *root); +extern struct avl_tree_node *avl_tree_first_in_order(const struct avl_tree_node *root); -extern struct avl_tree_node * -avl_tree_last_in_order(const struct avl_tree_node *root); +extern struct avl_tree_node *avl_tree_last_in_order(const struct avl_tree_node *root); -extern struct avl_tree_node * -avl_tree_next_in_order(const struct avl_tree_node *node); +extern struct avl_tree_node *avl_tree_next_in_order(const struct avl_tree_node *node); -extern struct avl_tree_node * -avl_tree_prev_in_order(const struct avl_tree_node *node); +extern struct avl_tree_node *avl_tree_prev_in_order(const struct avl_tree_node *node); extern struct avl_tree_node * avl_tree_first_in_postorder(const struct avl_tree_node *root); extern struct avl_tree_node * avl_tree_next_in_postorder(const struct avl_tree_node *prev, - const struct avl_tree_node *prev_parent); + const struct avl_tree_node *prev_parent); /* * Iterate through the nodes in an AVL tree in sorted order. * You may not modify the tree during the iteration. * * @child_struct - * Variable that will receive a pointer to each struct inserted into the - * tree. + * Variable that will receive a pointer to each struct inserted into the + * tree. * @root - * Root of the AVL tree. + * Root of the AVL tree. * @struct_name - * Type of *child_struct. + * Type of *child_struct. * @struct_member - * Member of @struct_name type that is the AVL tree node. + * Member of @struct_name type that is the AVL tree node. * * Example: * * struct int_wrapper { - * int data; - * struct avl_tree_node index_node; + * int data; + * struct avl_tree_node index_node; * }; * * void print_ints(struct avl_tree_node *root) * { - * struct int_wrapper *i; + * struct int_wrapper *i; * - * avl_tree_for_each_in_order(i, root, struct int_wrapper, index_node) - * printf("%d\n", i->data); + * avl_tree_for_each_in_order(i, root, struct int_wrapper, index_node) + * printf("%d\n", i->data); * } */ -#define avl_tree_for_each_in_order(child_struct, root, \ - struct_name, struct_member) \ - for (struct avl_tree_node *_cur = \ - avl_tree_first_in_order(root); \ - _cur && ((child_struct) = \ - avl_tree_entry(_cur, struct_name, \ - struct_member), 1); \ - _cur = avl_tree_next_in_order(_cur)) +#define avl_tree_for_each_in_order(child_struct, root, struct_name, struct_member) \ + for (struct avl_tree_node *_cur = avl_tree_first_in_order(root); \ + _cur && \ + ((child_struct) = avl_tree_entry(_cur, struct_name, struct_member), 1); \ + _cur = avl_tree_next_in_order(_cur)) /* * Like avl_tree_for_each_in_order(), but uses the reverse order. */ -#define avl_tree_for_each_in_reverse_order(child_struct, root, \ - struct_name, struct_member) \ - for (struct avl_tree_node *_cur = \ - avl_tree_last_in_order(root); \ - _cur && ((child_struct) = \ - avl_tree_entry(_cur, struct_name, \ - struct_member), 1); \ - _cur = avl_tree_prev_in_order(_cur)) +#define avl_tree_for_each_in_reverse_order( \ + child_struct, root, struct_name, struct_member) \ + for (struct avl_tree_node *_cur = avl_tree_last_in_order(root); \ + _cur && \ + ((child_struct) = avl_tree_entry(_cur, struct_name, struct_member), 1); \ + _cur = avl_tree_prev_in_order(_cur)) /* * Like avl_tree_for_each_in_order(), but iterates through the nodes in * postorder, so the current node may be deleted or freed. */ -#define avl_tree_for_each_in_postorder(child_struct, root, \ - struct_name, struct_member) \ - for (struct avl_tree_node *_cur = \ - avl_tree_first_in_postorder(root), *_parent; \ - _cur && ((child_struct) = \ - avl_tree_entry(_cur, struct_name, \ - struct_member), 1) \ - && (_parent = avl_get_parent(_cur), 1); \ - _cur = avl_tree_next_in_postorder(_cur, _parent)) +#define avl_tree_for_each_in_postorder(child_struct, root, struct_name, struct_member) \ + for (struct avl_tree_node *_cur = avl_tree_first_in_postorder(root), *_parent; \ + _cur && \ + ((child_struct) = avl_tree_entry(_cur, struct_name, struct_member), 1) && \ + (_parent = avl_get_parent(_cur), 1); \ + _cur = avl_tree_next_in_postorder(_cur, _parent)) #endif /* _AVL_TREE_H_ */ diff --git a/src/mwparserfromhell/parser/ctokenizer/common.h b/src/mwparserfromhell/parser/ctokenizer/common.h index 22a6b81..b3e9cec 100644 --- a/src/mwparserfromhell/parser/ctokenizer/common.h +++ b/src/mwparserfromhell/parser/ctokenizer/common.h @@ -23,55 +23,56 @@ SOFTWARE. #pragma once #ifndef PY_SSIZE_T_CLEAN -#define PY_SSIZE_T_CLEAN // See: https://docs.python.org/3/c-api/arg.html +# define PY_SSIZE_T_CLEAN // See: https://docs.python.org/3/c-api/arg.html #endif #include -#include #include +#include #include "avl_tree.h" /* Compatibility macros */ #ifndef uint64_t -#define uint64_t unsigned PY_LONG_LONG +# define uint64_t unsigned PY_LONG_LONG #endif -#define malloc PyObject_Malloc // XXX: yuck +#define malloc PyObject_Malloc // XXX: yuck #define realloc PyObject_Realloc #define free PyObject_Free /* Unicode support macros */ -#define PyUnicode_FROM_SINGLE(chr) \ +#define PyUnicode_FROM_SINGLE(chr) \ PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &(chr), 1) /* Error handling macros */ -#define BAD_ROUTE self->route_state -#define BAD_ROUTE_CONTEXT self->route_context -#define FAIL_ROUTE(context) { \ - self->route_state = 1; \ - self->route_context = context; \ - } -#define RESET_ROUTE() self->route_state = 0 +#define BAD_ROUTE self->route_state +#define BAD_ROUTE_CONTEXT self->route_context +#define FAIL_ROUTE(context) \ + do { \ + self->route_state = 1; \ + self->route_context = context; \ + } while (0) +#define RESET_ROUTE() self->route_state = 0 /* Shared globals */ -extern char** entitydefs; +extern char **entitydefs; -extern PyObject* NOARGS; -extern PyObject* definitions; +extern PyObject *NOARGS; +extern PyObject *definitions; /* Structs */ typedef struct { Py_ssize_t capacity; Py_ssize_t length; - PyObject* object; + PyObject *object; int kind; - void* data; + void *data; } Textbuffer; typedef struct { @@ -80,19 +81,19 @@ typedef struct { } StackIdent; struct Stack { - PyObject* stack; + PyObject *stack; uint64_t context; - Textbuffer* textbuffer; + Textbuffer *textbuffer; StackIdent ident; - struct Stack* next; + struct Stack *next; }; typedef struct Stack Stack; typedef struct { - PyObject* object; /* base PyUnicodeObject object */ - Py_ssize_t length; /* length of object, in code points */ - int kind; /* object's kind value */ - void* data; /* object's raw unicode buffer */ + PyObject *object; /* base PyUnicodeObject object */ + Py_ssize_t length; /* length of object, in code points */ + int kind; /* object's kind value */ + void *data; /* object's raw unicode buffer */ } TokenizerInput; typedef struct avl_tree_node avl_tree; @@ -104,13 +105,13 @@ typedef struct { typedef struct { PyObject_HEAD - TokenizerInput text; /* text to tokenize */ - Stack* topstack; /* topmost stack */ - Py_ssize_t head; /* current position in text */ - int global; /* global context */ - int depth; /* stack recursion depth */ - int route_state; /* whether a BadRoute has been triggered */ - uint64_t route_context; /* context when the last BadRoute was triggered */ - avl_tree* bad_routes; /* stack idents for routes known to fail */ - int skip_style_tags; /* temp fix for the sometimes broken tag parser */ + TokenizerInput text; /* text to tokenize */ + Stack *topstack; /* topmost stack */ + Py_ssize_t head; /* current position in text */ + int global; /* global context */ + int depth; /* stack recursion depth */ + int route_state; /* whether a BadRoute has been triggered */ + uint64_t route_context; /* context when the last BadRoute was triggered */ + avl_tree *bad_routes; /* stack idents for routes known to fail */ + int skip_style_tags; /* temp fix for the sometimes broken tag parser */ } Tokenizer; diff --git a/src/mwparserfromhell/parser/ctokenizer/contexts.h b/src/mwparserfromhell/parser/ctokenizer/contexts.h index 2696925..e20e67d 100644 --- a/src/mwparserfromhell/parser/ctokenizer/contexts.h +++ b/src/mwparserfromhell/parser/ctokenizer/contexts.h @@ -89,11 +89,17 @@ SOFTWARE. /* Aggregate contexts */ -#define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE | LC_TABLE_OPEN) -#define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) -#define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE | LC_TABLE_ROW_OPEN) -#define AGG_NO_WIKILINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI) -#define AGG_NO_EXT_LINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK) +#define AGG_FAIL \ + (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | \ + LC_TAG | LC_STYLE | LC_TABLE_OPEN) +#define AGG_UNSAFE \ + (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | \ + LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) +#define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE | LC_TABLE_ROW_OPEN) +#define AGG_NO_WIKILINKS \ + (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI) +#define AGG_NO_EXT_LINKS \ + (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK) /* Tag contexts */ diff --git a/src/mwparserfromhell/parser/ctokenizer/definitions.c b/src/mwparserfromhell/parser/ctokenizer/definitions.c index 323d8a1..06612e4 100644 --- a/src/mwparserfromhell/parser/ctokenizer/definitions.c +++ b/src/mwparserfromhell/parser/ctokenizer/definitions.c @@ -27,7 +27,8 @@ SOFTWARE. See the Python version for data sources. */ -static const char* URI_SCHEMES[] = { +// clang-format off +static const char *URI_SCHEMES[] = { "bitcoin", "ftp", "ftps", @@ -55,10 +56,10 @@ static const char* URI_SCHEMES[] = { "urn", "worldwind", "xmpp", - NULL, + NULL, }; -static const char* URI_SCHEMES_AUTHORITY_OPTIONAL[] = { +static const char *URI_SCHEMES_AUTHORITY_OPTIONAL[] = { "bitcoin", "geo", "magnet", @@ -73,7 +74,7 @@ static const char* URI_SCHEMES_AUTHORITY_OPTIONAL[] = { NULL, }; -static const char* PARSER_BLACKLIST[] = { +static const char *PARSER_BLACKLIST[] = { "categorytree", "ce", "chem", @@ -93,32 +94,32 @@ static const char* PARSER_BLACKLIST[] = { "timeline", NULL, }; +// clang-format on -static const char* SINGLE[] = { - "br", "wbr", "hr", "meta", "link", "img", "li", "dt", "dd", "th", "td", - "tr", NULL -}; +static const char *SINGLE[] = { + "br", "wbr", "hr", "meta", "link", "img", "li", "dt", "dd", "th", "td", "tr", NULL}; -static const char* SINGLE_ONLY[] = { - "br", "wbr", "hr", "meta", "link", "img", NULL -}; +static const char *SINGLE_ONLY[] = {"br", "wbr", "hr", "meta", "link", "img", NULL}; /* Convert a PyUnicodeObject to a lowercase ASCII char* array and store it in the second argument. The caller must free the return value when finished. If the return value is NULL, the conversion failed and *string is not set. */ -static PyObject* unicode_to_lcase_ascii(PyObject *input, const char **string) +static PyObject * +unicode_to_lcase_ascii(PyObject *input, const char **string) { PyObject *lower = PyObject_CallMethod(input, "lower", NULL), *bytes; - if (!lower) + if (!lower) { return NULL; + } bytes = PyUnicode_AsASCIIString(lower); Py_DECREF(lower); if (!bytes) { - if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) + if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) { PyErr_Clear(); + } return NULL; } *string = PyBytes_AS_STRING(bytes); @@ -128,14 +129,16 @@ static PyObject* unicode_to_lcase_ascii(PyObject *input, const char **string) /* Return whether a PyUnicodeObject is in a list of lowercase ASCII strings. */ -static int unicode_in_string_list(PyObject *input, const char **list) +static int +unicode_in_string_list(PyObject *input, const char **list) { const char *string; PyObject *temp = unicode_to_lcase_ascii(input, &string); int retval = 0; - if (!temp) + if (!temp) { return 0; + } while (*list) { if (!strcmp(*(list++), string)) { @@ -144,7 +147,7 @@ static int unicode_in_string_list(PyObject *input, const char **list) } } - end: +end: Py_DECREF(temp); return retval; } @@ -152,7 +155,8 @@ static int unicode_in_string_list(PyObject *input, const char **list) /* Return if the given tag's contents should be passed to the parser. */ -int is_parsable(PyObject *tag) +int +is_parsable(PyObject *tag) { return !unicode_in_string_list(tag, PARSER_BLACKLIST); } @@ -160,7 +164,8 @@ int is_parsable(PyObject *tag) /* Return whether or not the given tag can exist without a close tag. */ -int is_single(PyObject *tag) +int +is_single(PyObject *tag) { return unicode_in_string_list(tag, SINGLE); } @@ -168,7 +173,8 @@ int is_single(PyObject *tag) /* Return whether or not the given tag must exist without a close tag. */ -int is_single_only(PyObject *tag) +int +is_single_only(PyObject *tag) { return unicode_in_string_list(tag, SINGLE_ONLY); } @@ -176,10 +182,12 @@ int is_single_only(PyObject *tag) /* Return whether the given scheme is valid for external links. */ -int is_scheme(PyObject *scheme, int slashes) +int +is_scheme(PyObject *scheme, int slashes) { - if (slashes) + if (slashes) { return unicode_in_string_list(scheme, URI_SCHEMES); - else + } else { return unicode_in_string_list(scheme, URI_SCHEMES_AUTHORITY_OPTIONAL); + } } diff --git a/src/mwparserfromhell/parser/ctokenizer/definitions.h b/src/mwparserfromhell/parser/ctokenizer/definitions.h index 1ae1d09..bb4c657 100644 --- a/src/mwparserfromhell/parser/ctokenizer/definitions.h +++ b/src/mwparserfromhell/parser/ctokenizer/definitions.h @@ -28,12 +28,11 @@ SOFTWARE. /* Functions */ -int is_parsable(PyObject*); -int is_single(PyObject*); -int is_single_only(PyObject*); -int is_scheme(PyObject*, int); +int is_parsable(PyObject *); +int is_single(PyObject *); +int is_single_only(PyObject *); +int is_scheme(PyObject *, int); /* Macros */ -#define GET_HTML_TAG(markup) \ - (markup == ':' ? "dd" : markup == ';' ? "dt" : "li") +#define GET_HTML_TAG(markup) (markup == ':' ? "dd" : markup == ';' ? "dt" : "li") diff --git a/src/mwparserfromhell/parser/ctokenizer/tag_data.c b/src/mwparserfromhell/parser/ctokenizer/tag_data.c index 1b73533..f4e1464 100644 --- a/src/mwparserfromhell/parser/ctokenizer/tag_data.c +++ b/src/mwparserfromhell/parser/ctokenizer/tag_data.c @@ -26,13 +26,14 @@ SOFTWARE. /* Initialize a new TagData object. */ -TagData* TagData_new(TokenizerInput* text) +TagData * +TagData_new(TokenizerInput *text) { -#define ALLOC_BUFFER(name) \ - name = Textbuffer_new(text); \ - if (!name) { \ - TagData_dealloc(self); \ - return NULL; \ +#define ALLOC_BUFFER(name) \ + name = Textbuffer_new(text); \ + if (!name) { \ + TagData_dealloc(self); \ + return NULL; \ } TagData *self = malloc(sizeof(TagData)); @@ -54,25 +55,30 @@ TagData* TagData_new(TokenizerInput* text) /* Deallocate the given TagData object. */ -void TagData_dealloc(TagData* self) +void +TagData_dealloc(TagData *self) { - if (self->pad_first) + if (self->pad_first) { Textbuffer_dealloc(self->pad_first); - if (self->pad_before_eq) + } + if (self->pad_before_eq) { Textbuffer_dealloc(self->pad_before_eq); - if (self->pad_after_eq) + } + if (self->pad_after_eq) { Textbuffer_dealloc(self->pad_after_eq); + } free(self); } /* Clear the internal buffers of the given TagData object. */ -int TagData_reset_buffers(TagData* self) +int +TagData_reset_buffers(TagData *self) { - if (Textbuffer_reset(self->pad_first) || - Textbuffer_reset(self->pad_before_eq) || - Textbuffer_reset(self->pad_after_eq)) + if (Textbuffer_reset(self->pad_first) || Textbuffer_reset(self->pad_before_eq) || + Textbuffer_reset(self->pad_after_eq)) { return -1; + } return 0; } diff --git a/src/mwparserfromhell/parser/ctokenizer/tag_data.h b/src/mwparserfromhell/parser/ctokenizer/tag_data.h index 7e8edcb..70db867 100644 --- a/src/mwparserfromhell/parser/ctokenizer/tag_data.h +++ b/src/mwparserfromhell/parser/ctokenizer/tag_data.h @@ -29,15 +29,15 @@ SOFTWARE. typedef struct { uint64_t context; - Textbuffer* pad_first; - Textbuffer* pad_before_eq; - Textbuffer* pad_after_eq; + Textbuffer *pad_first; + Textbuffer *pad_before_eq; + Textbuffer *pad_after_eq; Py_UCS4 quoter; Py_ssize_t reset; } TagData; /* Functions */ -TagData* TagData_new(TokenizerInput*); -void TagData_dealloc(TagData*); -int TagData_reset_buffers(TagData*); +TagData *TagData_new(TokenizerInput *); +void TagData_dealloc(TagData *); +int TagData_reset_buffers(TagData *); diff --git a/src/mwparserfromhell/parser/ctokenizer/textbuffer.c b/src/mwparserfromhell/parser/ctokenizer/textbuffer.c index e37b7c3..4591f91 100644 --- a/src/mwparserfromhell/parser/ctokenizer/textbuffer.c +++ b/src/mwparserfromhell/parser/ctokenizer/textbuffer.c @@ -23,20 +23,22 @@ SOFTWARE. #include "textbuffer.h" #define INITIAL_CAPACITY 32 -#define RESIZE_FACTOR 2 -#define CONCAT_EXTRA 32 +#define RESIZE_FACTOR 2 +#define CONCAT_EXTRA 32 /* Internal allocation function for textbuffers. */ -static int internal_alloc(Textbuffer* self, Py_UCS4 maxchar) +static int +internal_alloc(Textbuffer *self, Py_UCS4 maxchar) { self->capacity = INITIAL_CAPACITY; self->length = 0; self->object = PyUnicode_New(self->capacity, maxchar); - if (!self->object) + if (!self->object) { return -1; + } self->kind = PyUnicode_KIND(self->object); self->data = PyUnicode_DATA(self->object); @@ -46,7 +48,8 @@ static int internal_alloc(Textbuffer* self, Py_UCS4 maxchar) /* Internal deallocation function for textbuffers. */ -static void internal_dealloc(Textbuffer* self) +static void +internal_dealloc(Textbuffer *self) { Py_DECREF(self->object); } @@ -54,14 +57,16 @@ static void internal_dealloc(Textbuffer* self) /* Internal resize function. */ -static int internal_resize(Textbuffer* self, Py_ssize_t new_cap) +static int +internal_resize(Textbuffer *self, Py_ssize_t new_cap) { PyObject *newobj; void *newdata; newobj = PyUnicode_New(new_cap, PyUnicode_MAX_CHAR_VALUE(self->object)); - if (!newobj) + if (!newobj) { return -1; + } newdata = PyUnicode_DATA(newobj); memcpy(newdata, self->data, self->length * self->kind); Py_DECREF(self->object); @@ -75,22 +80,25 @@ static int internal_resize(Textbuffer* self, Py_ssize_t new_cap) /* Create a new textbuffer object. */ -Textbuffer* Textbuffer_new(TokenizerInput* text) +Textbuffer * +Textbuffer_new(TokenizerInput *text) { - Textbuffer* self = malloc(sizeof(Textbuffer)); + Textbuffer *self = malloc(sizeof(Textbuffer)); Py_UCS4 maxchar = 0; maxchar = PyUnicode_MAX_CHAR_VALUE(text->object); - if (!self) + if (!self) { goto fail_nomem; - if (internal_alloc(self, maxchar) < 0) + } + if (internal_alloc(self, maxchar) < 0) { goto fail_dealloc; + } return self; - fail_dealloc: +fail_dealloc: free(self); - fail_nomem: +fail_nomem: PyErr_NoMemory(); return NULL; } @@ -98,7 +106,8 @@ Textbuffer* Textbuffer_new(TokenizerInput* text) /* Deallocate the given textbuffer. */ -void Textbuffer_dealloc(Textbuffer* self) +void +Textbuffer_dealloc(Textbuffer *self) { internal_dealloc(self); free(self); @@ -107,26 +116,30 @@ void Textbuffer_dealloc(Textbuffer* self) /* Reset a textbuffer to its initial, empty state. */ -int Textbuffer_reset(Textbuffer* self) +int +Textbuffer_reset(Textbuffer *self) { Py_UCS4 maxchar = 0; maxchar = PyUnicode_MAX_CHAR_VALUE(self->object); internal_dealloc(self); - if (internal_alloc(self, maxchar)) + if (internal_alloc(self, maxchar)) { return -1; + } return 0; } /* Write a Unicode codepoint to the given textbuffer. */ -int Textbuffer_write(Textbuffer* self, Py_UCS4 code) +int +Textbuffer_write(Textbuffer *self, Py_UCS4 code) { if (self->length >= self->capacity) { - if (internal_resize(self, self->capacity * RESIZE_FACTOR) < 0) + if (internal_resize(self, self->capacity * RESIZE_FACTOR) < 0) { return -1; + } } PyUnicode_WRITE(self->kind, self->data, self->length++, code); @@ -139,7 +152,8 @@ int Textbuffer_write(Textbuffer* self, Py_UCS4 code) This function does not check for bounds. */ -Py_UCS4 Textbuffer_read(Textbuffer* self, Py_ssize_t index) +Py_UCS4 +Textbuffer_read(Textbuffer *self, Py_ssize_t index) { return PyUnicode_READ(self->kind, self->data, index); } @@ -147,7 +161,8 @@ Py_UCS4 Textbuffer_read(Textbuffer* self, Py_ssize_t index) /* Return the contents of the textbuffer as a Python Unicode object. */ -PyObject* Textbuffer_render(Textbuffer* self) +PyObject * +Textbuffer_render(Textbuffer *self) { return PyUnicode_FromKindAndData(self->kind, self->data, self->length); } @@ -155,17 +170,20 @@ PyObject* Textbuffer_render(Textbuffer* self) /* Concatenate the 'other' textbuffer onto the end of the given textbuffer. */ -int Textbuffer_concat(Textbuffer* self, Textbuffer* other) +int +Textbuffer_concat(Textbuffer *self, Textbuffer *other) { Py_ssize_t newlen = self->length + other->length; if (newlen > self->capacity) { - if (internal_resize(self, newlen + CONCAT_EXTRA) < 0) + if (internal_resize(self, newlen + CONCAT_EXTRA) < 0) { return -1; + } } assert(self->kind == other->kind); - memcpy(((Py_UCS1*) self->data) + self->kind * self->length, other->data, + memcpy(((Py_UCS1 *) self->data) + self->kind * self->length, + other->data, other->length * other->kind); self->length = newlen; @@ -175,15 +193,16 @@ int Textbuffer_concat(Textbuffer* self, Textbuffer* other) /* Reverse the contents of the given textbuffer. */ -void Textbuffer_reverse(Textbuffer* self) +void +Textbuffer_reverse(Textbuffer *self) { Py_ssize_t i, end = self->length - 1; Py_UCS4 tmp; for (i = 0; i < self->length / 2; i++) { tmp = PyUnicode_READ(self->kind, self->data, i); - PyUnicode_WRITE(self->kind, self->data, i, - PyUnicode_READ(self->kind, self->data, end - i)); + PyUnicode_WRITE( + self->kind, self->data, i, PyUnicode_READ(self->kind, self->data, end - i)); PyUnicode_WRITE(self->kind, self->data, end - i, tmp); } } diff --git a/src/mwparserfromhell/parser/ctokenizer/textbuffer.h b/src/mwparserfromhell/parser/ctokenizer/textbuffer.h index 85b39bc..e7db0c1 100644 --- a/src/mwparserfromhell/parser/ctokenizer/textbuffer.h +++ b/src/mwparserfromhell/parser/ctokenizer/textbuffer.h @@ -26,11 +26,11 @@ SOFTWARE. /* Functions */ -Textbuffer* Textbuffer_new(TokenizerInput*); -void Textbuffer_dealloc(Textbuffer*); -int Textbuffer_reset(Textbuffer*); -int Textbuffer_write(Textbuffer*, Py_UCS4); -Py_UCS4 Textbuffer_read(Textbuffer*, Py_ssize_t); -PyObject* Textbuffer_render(Textbuffer*); -int Textbuffer_concat(Textbuffer*, Textbuffer*); -void Textbuffer_reverse(Textbuffer*); +Textbuffer *Textbuffer_new(TokenizerInput *); +void Textbuffer_dealloc(Textbuffer *); +int Textbuffer_reset(Textbuffer *); +int Textbuffer_write(Textbuffer *, Py_UCS4); +Py_UCS4 Textbuffer_read(Textbuffer *, Py_ssize_t); +PyObject *Textbuffer_render(Textbuffer *); +int Textbuffer_concat(Textbuffer *, Textbuffer *); +void Textbuffer_reverse(Textbuffer *); diff --git a/src/mwparserfromhell/parser/ctokenizer/tok_parse.c b/src/mwparserfromhell/parser/ctokenizer/tok_parse.c index 740e9bf..f1d036f 100644 --- a/src/mwparserfromhell/parser/ctokenizer/tok_parse.c +++ b/src/mwparserfromhell/parser/ctokenizer/tok_parse.c @@ -27,27 +27,26 @@ SOFTWARE. #include "tok_support.h" #include "tokens.h" -#define DIGITS "0123456789" -#define HEXDIGITS "0123456789abcdefABCDEF" -#define ALPHANUM "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" -#define URISCHEME "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-" +#define DIGITS "0123456789" +#define HEXDIGITS "0123456789abcdefABCDEF" +#define ALPHANUM "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" +#define URISCHEME "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-" -#define MAX_BRACES 255 +#define MAX_BRACES 255 #define MAX_ENTITY_SIZE 8 typedef struct { - PyObject* title; + PyObject *title; int level; } HeadingData; /* Forward declarations */ -static PyObject* Tokenizer_really_parse_external_link( - Tokenizer*, int, Textbuffer*); -static int Tokenizer_parse_entity(Tokenizer*); -static int Tokenizer_parse_comment(Tokenizer*); -static int Tokenizer_handle_dl_term(Tokenizer*); -static int Tokenizer_parse_tag(Tokenizer*); +static PyObject *Tokenizer_really_parse_external_link(Tokenizer *, int, Textbuffer *); +static int Tokenizer_parse_entity(Tokenizer *); +static int Tokenizer_parse_comment(Tokenizer *); +static int Tokenizer_handle_dl_term(Tokenizer *); +static int Tokenizer_parse_tag(Tokenizer *); /* Determine whether the given code point is a marker. @@ -57,8 +56,9 @@ static int is_marker(Py_UCS4 this) int i; for (i = 0; i < NUM_MARKERS; i++) { - if (MARKERS[i] == this) + if (MARKERS[i] == this) { return 1; + } } return 0; } @@ -66,34 +66,39 @@ static int is_marker(Py_UCS4 this) /* Given a context, return the heading level encoded within it. */ -static int heading_level_from_context(uint64_t n) +static int +heading_level_from_context(uint64_t n) { int level; n /= LC_HEADING_LEVEL_1; - for (level = 1; n > 1; n >>= 1) + for (level = 1; n > 1; n >>= 1) { level++; + } return level; } /* Sanitize the name of a tag so it can be compared with others for equality. */ -static PyObject* strip_tag_name(PyObject* token, int take_attr) +static PyObject * +strip_tag_name(PyObject *token, int take_attr) { PyObject *text, *rstripped, *lowered; if (take_attr) { text = PyObject_GetAttrString(token, "text"); - if (!text) + if (!text) { return NULL; + } rstripped = PyObject_CallMethod(text, "rstrip", NULL); Py_DECREF(text); - } - else + } else { rstripped = PyObject_CallMethod(token, "rstrip", NULL); - if (!rstripped) + } + if (!rstripped) { return NULL; + } lowered = PyObject_CallMethod(rstripped, "lower", NULL); Py_DECREF(rstripped); return lowered; @@ -102,22 +107,25 @@ static PyObject* strip_tag_name(PyObject* token, int take_attr) /* Parse a template at the head of the wikicode string. */ -static int Tokenizer_parse_template(Tokenizer* self, int has_content) +static int +Tokenizer_parse_template(Tokenizer *self, int has_content) { PyObject *template; Py_ssize_t reset = self->head; uint64_t context = LC_TEMPLATE_NAME; - if (has_content) + if (has_content) { context |= LC_HAS_TEMPLATE; + } template = Tokenizer_parse(self, context, 1); if (BAD_ROUTE) { self->head = reset; return 0; } - if (!template) + if (!template) { return -1; + } if (Tokenizer_emit_first(self, TemplateOpen)) { Py_DECREF(template); return -1; @@ -127,15 +135,17 @@ static int Tokenizer_parse_template(Tokenizer* self, int has_content) return -1; } Py_DECREF(template); - if (Tokenizer_emit(self, TemplateClose)) + if (Tokenizer_emit(self, TemplateClose)) { return -1; + } return 0; } /* Parse an argument at the head of the wikicode string. */ -static int Tokenizer_parse_argument(Tokenizer* self) +static int +Tokenizer_parse_argument(Tokenizer *self) { PyObject *argument; Py_ssize_t reset = self->head; @@ -145,8 +155,9 @@ static int Tokenizer_parse_argument(Tokenizer* self) self->head = reset; return 0; } - if (!argument) + if (!argument) { return -1; + } if (Tokenizer_emit_first(self, ArgumentOpen)) { Py_DECREF(argument); return -1; @@ -156,15 +167,17 @@ static int Tokenizer_parse_argument(Tokenizer* self) return -1; } Py_DECREF(argument); - if (Tokenizer_emit(self, ArgumentClose)) + if (Tokenizer_emit(self, ArgumentClose)) { return -1; + } return 0; } /* Parse a template or argument at the head of the wikicode string. */ -static int Tokenizer_parse_template_or_argument(Tokenizer* self) +static int +Tokenizer_parse_template_or_argument(Tokenizer *self) { unsigned int braces = 2, i; int has_content = 0; @@ -175,67 +188,79 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self) self->head++; braces++; } - if (Tokenizer_push(self, 0)) + if (Tokenizer_push(self, 0)) { return -1; + } while (braces) { if (braces == 1) { - if (Tokenizer_emit_text_then_stack(self, "{")) + if (Tokenizer_emit_text_then_stack(self, "{")) { return -1; + } return 0; } if (braces == 2) { - if (Tokenizer_parse_template(self, has_content)) + if (Tokenizer_parse_template(self, has_content)) { return -1; + } if (BAD_ROUTE) { RESET_ROUTE(); - if (Tokenizer_emit_text_then_stack(self, "{{")) + if (Tokenizer_emit_text_then_stack(self, "{{")) { return -1; + } return 0; } break; } - if (Tokenizer_parse_argument(self)) + if (Tokenizer_parse_argument(self)) { return -1; + } if (BAD_ROUTE) { RESET_ROUTE(); - if (Tokenizer_parse_template(self, has_content)) + if (Tokenizer_parse_template(self, has_content)) { return -1; + } if (BAD_ROUTE) { char text[MAX_BRACES + 1]; RESET_ROUTE(); - for (i = 0; i < braces; i++) text[i] = '{'; + for (i = 0; i < braces; i++) { + text[i] = '{'; + } text[braces] = '\0'; - if (Tokenizer_emit_text_then_stack(self, text)) + if (Tokenizer_emit_text_then_stack(self, text)) { return -1; + } return 0; - } - else + } else { braces -= 2; - } - else + } + } else { braces -= 3; + } if (braces) { has_content = 1; self->head++; } } tokenlist = Tokenizer_pop(self); - if (!tokenlist) + if (!tokenlist) { return -1; + } if (Tokenizer_emit_all(self, tokenlist)) { Py_DECREF(tokenlist); return -1; } Py_DECREF(tokenlist); - if (self->topstack->context & LC_FAIL_NEXT) + if (self->topstack->context & LC_FAIL_NEXT) { self->topstack->context ^= LC_FAIL_NEXT; + } return 0; } /* Handle a template parameter at the head of the string. */ -static int Tokenizer_handle_template_param(Tokenizer* self) +static int +Tokenizer_handle_template_param(Tokenizer *self) { PyObject *stack; @@ -245,38 +270,43 @@ static int Tokenizer_handle_template_param(Tokenizer* self) return -1; } self->topstack->context ^= LC_TEMPLATE_NAME; - } - else if (self->topstack->context & LC_TEMPLATE_PARAM_VALUE) + } else if (self->topstack->context & LC_TEMPLATE_PARAM_VALUE) { self->topstack->context ^= LC_TEMPLATE_PARAM_VALUE; + } if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) { stack = Tokenizer_pop(self); - if (!stack) + if (!stack) { return -1; + } if (Tokenizer_emit_all(self, stack)) { Py_DECREF(stack); return -1; } Py_DECREF(stack); - } - else + } else { self->topstack->context |= LC_TEMPLATE_PARAM_KEY; - if (Tokenizer_emit(self, TemplateParamSeparator)) + } + if (Tokenizer_emit(self, TemplateParamSeparator)) { return -1; - if (Tokenizer_push(self, self->topstack->context)) + } + if (Tokenizer_push(self, self->topstack->context)) { return -1; + } return 0; } /* Handle a template parameter's value at the head of the string. */ -static int Tokenizer_handle_template_param_value(Tokenizer* self) +static int +Tokenizer_handle_template_param_value(Tokenizer *self) { PyObject *stack; stack = Tokenizer_pop(self); - if (!stack) + if (!stack) { return -1; + } if (Tokenizer_emit_all(self, stack)) { Py_DECREF(stack); return -1; @@ -284,26 +314,29 @@ static int Tokenizer_handle_template_param_value(Tokenizer* self) Py_DECREF(stack); self->topstack->context ^= LC_TEMPLATE_PARAM_KEY; self->topstack->context |= LC_TEMPLATE_PARAM_VALUE; - if (Tokenizer_emit(self, TemplateParamEquals)) + if (Tokenizer_emit(self, TemplateParamEquals)) { return -1; + } return 0; } /* Handle the end of a template at the head of the string. */ -static PyObject* Tokenizer_handle_template_end(Tokenizer* self) +static PyObject * +Tokenizer_handle_template_end(Tokenizer *self) { - PyObject* stack; + PyObject *stack; if (self->topstack->context & LC_TEMPLATE_NAME) { - if (!(self->topstack->context & (LC_HAS_TEXT | LC_HAS_TEMPLATE))) + if (!(self->topstack->context & (LC_HAS_TEXT | LC_HAS_TEMPLATE))) { return Tokenizer_fail_route(self); - } - else if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) { + } + } else if (self->topstack->context & LC_TEMPLATE_PARAM_KEY) { stack = Tokenizer_pop(self); - if (!stack) + if (!stack) { return NULL; + } if (Tokenizer_emit_all(self, stack)) { Py_DECREF(stack); return NULL; @@ -318,21 +351,24 @@ static PyObject* Tokenizer_handle_template_end(Tokenizer* self) /* Handle the separator between an argument's name and default. */ -static int Tokenizer_handle_argument_separator(Tokenizer* self) +static int +Tokenizer_handle_argument_separator(Tokenizer *self) { self->topstack->context ^= LC_ARGUMENT_NAME; self->topstack->context |= LC_ARGUMENT_DEFAULT; - if (Tokenizer_emit(self, ArgumentSeparator)) + if (Tokenizer_emit(self, ArgumentSeparator)) { return -1; + } return 0; } /* Handle the end of an argument at the head of the string. */ -static PyObject* Tokenizer_handle_argument_end(Tokenizer* self) +static PyObject * +Tokenizer_handle_argument_end(Tokenizer *self) { - PyObject* stack = Tokenizer_pop(self); + PyObject *stack = Tokenizer_pop(self); self->head += 2; return stack; @@ -341,7 +377,8 @@ static PyObject* Tokenizer_handle_argument_end(Tokenizer* self) /* Parse an internal wikilink at the head of the wikicode string. */ -static int Tokenizer_parse_wikilink(Tokenizer* self) +static int +Tokenizer_parse_wikilink(Tokenizer *self) { Py_ssize_t reset; PyObject *extlink, *wikilink, *kwargs; @@ -358,12 +395,14 @@ static int Tokenizer_parse_wikilink(Tokenizer* self) if (BAD_ROUTE) { RESET_ROUTE(); self->head = reset; - if (Tokenizer_emit_text(self, "[[")) + if (Tokenizer_emit_text(self, "[[")) { return -1; + } return 0; } - if (!wikilink) + if (!wikilink) { return -1; + } if (Tokenizer_emit(self, WikilinkOpen)) { Py_DECREF(wikilink); return -1; @@ -373,19 +412,22 @@ static int Tokenizer_parse_wikilink(Tokenizer* self) return -1; } Py_DECREF(wikilink); - if (Tokenizer_emit(self, WikilinkClose)) + if (Tokenizer_emit(self, WikilinkClose)) { return -1; + } return 0; } - if (!extlink) + if (!extlink) { return -1; + } if (self->topstack->context & LC_EXT_LINK_TITLE) { // In this exceptional case, an external link that looks like a // wikilink inside of an external link is parsed as text: Py_DECREF(extlink); self->head = reset; - if (Tokenizer_emit_text(self, "[[")) + if (Tokenizer_emit_text(self, "[[")) { return -1; + } return 0; } if (Tokenizer_emit_text(self, "[")) { @@ -407,29 +449,33 @@ static int Tokenizer_parse_wikilink(Tokenizer* self) return -1; } Py_DECREF(extlink); - if (Tokenizer_emit(self, ExternalLinkClose)) + if (Tokenizer_emit(self, ExternalLinkClose)) { return -1; + } return 0; } /* Handle the separator between a wikilink's title and its text. */ -static int Tokenizer_handle_wikilink_separator(Tokenizer* self) +static int +Tokenizer_handle_wikilink_separator(Tokenizer *self) { self->topstack->context ^= LC_WIKILINK_TITLE; self->topstack->context |= LC_WIKILINK_TEXT; - if (Tokenizer_emit(self, WikilinkSeparator)) + if (Tokenizer_emit(self, WikilinkSeparator)) { return -1; + } return 0; } /* Handle the end of a wikilink at the head of the string. */ -static PyObject* Tokenizer_handle_wikilink_end(Tokenizer* self) +static PyObject * +Tokenizer_handle_wikilink_end(Tokenizer *self) { - PyObject* stack = Tokenizer_pop(self); + PyObject *stack = Tokenizer_pop(self); self->head += 1; return stack; } @@ -437,34 +483,40 @@ static PyObject* Tokenizer_handle_wikilink_end(Tokenizer* self) /* Parse the URI scheme of a bracket-enclosed external link. */ -static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self) +static int +Tokenizer_parse_bracketed_uri_scheme(Tokenizer *self) { - static const char* valid = URISCHEME; - Textbuffer* buffer; - PyObject* scheme; + static const char *valid = URISCHEME; + Textbuffer *buffer; + PyObject *scheme; Py_UCS4 this; int slashes, i; - if (Tokenizer_check_route(self, LC_EXT_LINK_URI) < 0) + if (Tokenizer_check_route(self, LC_EXT_LINK_URI) < 0) { return 0; - if (Tokenizer_push(self, LC_EXT_LINK_URI)) + } + if (Tokenizer_push(self, LC_EXT_LINK_URI)) { return -1; + } if (Tokenizer_read(self, 0) == '/' && Tokenizer_read(self, 1) == '/') { - if (Tokenizer_emit_text(self, "//")) + if (Tokenizer_emit_text(self, "//")) { return -1; + } self->head += 2; - } - else { + } else { buffer = Textbuffer_new(&self->text); - if (!buffer) + if (!buffer) { return -1; + } while ((this = Tokenizer_read(self, 0))) { i = 0; while (1) { - if (!valid[i]) + if (!valid[i]) { goto end_of_loop; - if (this == (Py_UCS4) valid[i]) + } + if (this == (Py_UCS4) valid[i]) { break; + } i++; } Textbuffer_write(buffer, this); @@ -474,7 +526,7 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self) } self->head++; } - end_of_loop: + end_of_loop: if (this != ':') { Textbuffer_dealloc(buffer); Tokenizer_fail_route(self); @@ -485,8 +537,7 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self) return -1; } self->head++; - slashes = (Tokenizer_read(self, 0) == '/' && - Tokenizer_read(self, 1) == '/'); + slashes = (Tokenizer_read(self, 0) == '/' && Tokenizer_read(self, 1) == '/'); if (slashes) { if (Tokenizer_emit_text(self, "//")) { Textbuffer_dealloc(buffer); @@ -496,8 +547,9 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self) } scheme = Textbuffer_render(buffer); Textbuffer_dealloc(buffer); - if (!scheme) + if (!scheme) { return -1; + } if (!is_scheme(scheme, slashes)) { Py_DECREF(scheme); Tokenizer_fail_route(self); @@ -511,9 +563,10 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self) /* Parse the URI scheme of a free (no brackets) external link. */ -static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) +static int +Tokenizer_parse_free_uri_scheme(Tokenizer *self) { - static const char* valid = URISCHEME; + static const char *valid = URISCHEME; Textbuffer *scheme_buffer = Textbuffer_new(&self->text); PyObject *scheme; Py_UCS4 ch; @@ -521,15 +574,17 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) int slashes, j; uint64_t new_context; - if (!scheme_buffer) + if (!scheme_buffer) { return -1; + } // We have to backtrack through the textbuffer looking for our scheme since // it was just parsed as text: for (i = self->topstack->textbuffer->length - 1; i >= 0; i--) { ch = Textbuffer_read(self->topstack->textbuffer, i); // Stop at the first non-word character (equivalent to \W in regex) - if (!Py_UNICODE_ISALNUM(ch) && ch != '_') + if (!Py_UNICODE_ISALNUM(ch) && ch != '_') { break; + } j = 0; do { if (!valid[j]) { @@ -546,8 +601,7 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) Textbuffer_dealloc(scheme_buffer); return -1; } - slashes = (Tokenizer_read(self, 0) == '/' && - Tokenizer_read(self, 1) == '/'); + slashes = (Tokenizer_read(self, 0) == '/' && Tokenizer_read(self, 1) == '/'); if (!is_scheme(scheme, slashes)) { Py_DECREF(scheme); Textbuffer_dealloc(scheme_buffer); @@ -564,13 +618,16 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) Textbuffer_dealloc(scheme_buffer); return -1; } - if (Tokenizer_emit_textbuffer(self, scheme_buffer)) + if (Tokenizer_emit_textbuffer(self, scheme_buffer)) { return -1; - if (Tokenizer_emit_char(self, ':')) + } + if (Tokenizer_emit_char(self, ':')) { return -1; + } if (slashes) { - if (Tokenizer_emit_text(self, "//")) + if (Tokenizer_emit_text(self, "//")) { return -1; + } self->head += 2; } return 0; @@ -579,27 +636,34 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) /* Handle text in a free external link, including trailing punctuation. */ -static int Tokenizer_handle_free_link_text( - Tokenizer* self, int* parens, Textbuffer* tail, Py_UCS4 this) +static int +Tokenizer_handle_free_link_text(Tokenizer *self, + int *parens, + Textbuffer *tail, + Py_UCS4 this) { - #define PUSH_TAIL_BUFFER(tail, error) \ - if (tail && tail->length > 0) { \ - if (Textbuffer_concat(self->topstack->textbuffer, tail)) \ - return error; \ - if (Textbuffer_reset(tail)) \ - return error; \ - } +#define PUSH_TAIL_BUFFER(tail, error) \ + do { \ + if (tail && tail->length > 0) { \ + if (Textbuffer_concat(self->topstack->textbuffer, tail)) { \ + return error; \ + } \ + if (Textbuffer_reset(tail)) { \ + return error; \ + } \ + } \ + } while (0) if (this == '(' && !(*parens)) { *parens = 1; - PUSH_TAIL_BUFFER(tail, -1) - } - else if (this == ',' || this == ';' || this == '\\' || this == '.' || - this == ':' || this == '!' || this == '?' || - (!(*parens) && this == ')')) + PUSH_TAIL_BUFFER(tail, -1); + } else if (this == ',' || this == ';' || this == '\\' || this == '.' || + this == ':' || this == '!' || this == '?' || + (!(*parens) && this == ')')) { return Textbuffer_write(tail, this); - else - PUSH_TAIL_BUFFER(tail, -1) + } else { + PUSH_TAIL_BUFFER(tail, -1); + } return Tokenizer_emit_char(self, this); } @@ -607,99 +671,106 @@ static int Tokenizer_handle_free_link_text( Return whether the current head is the end of a URI. */ static int -Tokenizer_is_uri_end(Tokenizer* self, Py_UCS4 this, Py_UCS4 next) +Tokenizer_is_uri_end(Tokenizer *self, Py_UCS4 this, Py_UCS4 next) { // Built from Tokenizer_parse()'s end sentinels: Py_UCS4 after = Tokenizer_read(self, 2); uint64_t ctx = self->topstack->context; - return (!this || this == '\n' || this == '[' || this == ']' || - this == '<' || this == '>' || this == '"' || this == ' ' || - (this == '\'' && next == '\'') || - (this == '|' && ctx & LC_TEMPLATE) || - (this == '=' && ctx & (LC_TEMPLATE_PARAM_KEY | LC_HEADING)) || - (this == '}' && next == '}' && - (ctx & LC_TEMPLATE || (after == '}' && ctx & LC_ARGUMENT)))); + return (!this || this == '\n' || this == '[' || this == ']' || this == '<' || + this == '>' || this == '"' || this == ' ' || + (this == '\'' && next == '\'') || (this == '|' && ctx & LC_TEMPLATE) || + (this == '=' && ctx & (LC_TEMPLATE_PARAM_KEY | LC_HEADING)) || + (this == '}' && next == '}' && + (ctx & LC_TEMPLATE || (after == '}' && ctx & LC_ARGUMENT)))); } /* Really parse an external link. */ -static PyObject* -Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, - Textbuffer* extra) +static PyObject * +Tokenizer_really_parse_external_link(Tokenizer *self, int brackets, Textbuffer *extra) { Py_UCS4 this, next; int parens = 0; - if (brackets ? Tokenizer_parse_bracketed_uri_scheme(self) : - Tokenizer_parse_free_uri_scheme(self)) + if (brackets ? Tokenizer_parse_bracketed_uri_scheme(self) + : Tokenizer_parse_free_uri_scheme(self)) { return NULL; - if (BAD_ROUTE) + } + if (BAD_ROUTE) { return NULL; + } this = Tokenizer_read(self, 0); - if (!this || this == '\n' || this == ' ' || this == ']') + if (!this || this == '\n' || this == ' ' || this == ']') { return Tokenizer_fail_route(self); - if (!brackets && this == '[') + } + if (!brackets && this == '[') { return Tokenizer_fail_route(self); + } while (1) { this = Tokenizer_read(self, 0); next = Tokenizer_read(self, 1); if (this == '&') { - PUSH_TAIL_BUFFER(extra, NULL) - if (Tokenizer_parse_entity(self)) + PUSH_TAIL_BUFFER(extra, NULL); + if (Tokenizer_parse_entity(self)) { return NULL; - } - else if (this == '<' && next == '!' - && Tokenizer_read(self, 2) == '-' - && Tokenizer_read(self, 3) == '-') { - PUSH_TAIL_BUFFER(extra, NULL) - if (Tokenizer_parse_comment(self)) + } + } else if (this == '<' && next == '!' && Tokenizer_read(self, 2) == '-' && + Tokenizer_read(self, 3) == '-') { + PUSH_TAIL_BUFFER(extra, NULL); + if (Tokenizer_parse_comment(self)) { return NULL; - } - else if (this == '{' && next == '{' && Tokenizer_CAN_RECURSE(self)) { - PUSH_TAIL_BUFFER(extra, NULL) - if (Tokenizer_parse_template_or_argument(self)) + } + } else if (this == '{' && next == '{' && Tokenizer_CAN_RECURSE(self)) { + PUSH_TAIL_BUFFER(extra, NULL); + if (Tokenizer_parse_template_or_argument(self)) { return NULL; - } - else if (brackets) { - if (!this || this == '\n') + } + } else if (brackets) { + if (!this || this == '\n') { return Tokenizer_fail_route(self); - if (this == ']') + } + if (this == ']') { return Tokenizer_pop(self); + } if (Tokenizer_is_uri_end(self, this, next)) { if (this == ' ') { - if (Tokenizer_emit(self, ExternalLinkSeparator)) + if (Tokenizer_emit(self, ExternalLinkSeparator)) { return NULL; + } self->head++; - } - else { - PyObject* kwargs = PyDict_New(); - if (!kwargs) + } else { + PyObject *kwargs = PyDict_New(); + if (!kwargs) { return NULL; + } PyDict_SetItemString(kwargs, "suppress_space", Py_True); - if (Tokenizer_emit_kwargs(self, ExternalLinkSeparator, kwargs)) + if (Tokenizer_emit_kwargs(self, ExternalLinkSeparator, kwargs)) { return NULL; + } } self->topstack->context ^= LC_EXT_LINK_URI; self->topstack->context |= LC_EXT_LINK_TITLE; return Tokenizer_parse(self, 0, 0); } - if (Tokenizer_emit_char(self, this)) + if (Tokenizer_emit_char(self, this)) { return NULL; - } - else { + } + } else { if (Tokenizer_is_uri_end(self, this, next)) { if (this == ' ') { - if (Textbuffer_write(extra, this)) + if (Textbuffer_write(extra, this)) { return NULL; - } - else + } + } else { self->head--; + } return Tokenizer_pop(self); } - if (Tokenizer_handle_free_link_text(self, &parens, extra, this)) + if (Tokenizer_handle_free_link_text(self, &parens, extra, this)) { return NULL; + } } self->head++; } @@ -709,18 +780,20 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, Remove the URI scheme of a new external link from the textbuffer. */ static int -Tokenizer_remove_uri_scheme_from_textbuffer(Tokenizer* self, PyObject* link) +Tokenizer_remove_uri_scheme_from_textbuffer(Tokenizer *self, PyObject *link) { - PyObject *text = PyObject_GetAttrString(PyList_GET_ITEM(link, 0), "text"), - *split, *scheme; + PyObject *text = PyObject_GetAttrString(PyList_GET_ITEM(link, 0), "text"), *split, + *scheme; Py_ssize_t length; - if (!text) + if (!text) { return -1; + } split = PyObject_CallMethod(text, "split", "si", ":", 1); Py_DECREF(text); - if (!split) + if (!split) { return -1; + } scheme = PyList_GET_ITEM(split, 0); length = PyUnicode_GET_LENGTH(scheme); Py_DECREF(split); @@ -731,24 +804,28 @@ Tokenizer_remove_uri_scheme_from_textbuffer(Tokenizer* self, PyObject* link) /* Parse an external link at the head of the wikicode string. */ -static int Tokenizer_parse_external_link(Tokenizer* self, int brackets) +static int +Tokenizer_parse_external_link(Tokenizer *self, int brackets) { - #define NOT_A_LINK \ - if (!brackets && self->topstack->context & LC_DLTERM) \ - return Tokenizer_handle_dl_term(self); \ - return Tokenizer_emit_char(self, Tokenizer_read(self, 0)) +#define NOT_A_LINK \ + do { \ + if (!brackets && self->topstack->context & LC_DLTERM) { \ + return Tokenizer_handle_dl_term(self); \ + } \ + return Tokenizer_emit_char(self, Tokenizer_read(self, 0)); \ + } while (0) Py_ssize_t reset = self->head; PyObject *link, *kwargs; Textbuffer *extra; - if (self->topstack->context & AGG_NO_EXT_LINKS || - !(Tokenizer_CAN_RECURSE(self))) { + if (self->topstack->context & AGG_NO_EXT_LINKS || !(Tokenizer_CAN_RECURSE(self))) { NOT_A_LINK; } extra = Textbuffer_new(&self->text); - if (!extra) + if (!extra) { return -1; + } self->head++; link = Tokenizer_really_parse_external_link(self, brackets, extra); if (BAD_ROUTE) { @@ -790,8 +867,9 @@ static int Tokenizer_parse_external_link(Tokenizer* self, int brackets) Textbuffer_dealloc(extra); return -1; } - if (extra->length > 0) + if (extra->length > 0) { return Tokenizer_emit_textbuffer(self, extra); + } Textbuffer_dealloc(extra); return 0; } @@ -799,7 +877,8 @@ static int Tokenizer_parse_external_link(Tokenizer* self, int brackets) /* Parse a section heading at the head of the wikicode string. */ -static int Tokenizer_parse_heading(Tokenizer* self) +static int +Tokenizer_parse_heading(Tokenizer *self) { Py_ssize_t reset = self->head; int best = 1, i, context, diff; @@ -813,13 +892,14 @@ static int Tokenizer_parse_heading(Tokenizer* self) self->head++; } context = LC_HEADING_LEVEL_1 << (best > 5 ? 5 : best - 1); - heading = (HeadingData*) Tokenizer_parse(self, context, 1); + heading = (HeadingData *) Tokenizer_parse(self, context, 1); if (BAD_ROUTE) { RESET_ROUTE(); self->head = reset + best - 1; for (i = 0; i < best; i++) { - if (Tokenizer_emit_char(self, '=')) + if (Tokenizer_emit_char(self, '=')) { return -1; + } } self->global ^= GL_HEADING; return 0; @@ -864,8 +944,9 @@ static int Tokenizer_parse_heading(Tokenizer* self) } Py_DECREF(heading->title); free(heading); - if (Tokenizer_emit(self, HeadingEnd)) + if (Tokenizer_emit(self, HeadingEnd)) { return -1; + } self->global ^= GL_HEADING; return 0; } @@ -873,7 +954,8 @@ static int Tokenizer_parse_heading(Tokenizer* self) /* Handle the end of a section heading at the head of the string. */ -static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) +static HeadingData * +Tokenizer_handle_heading_end(Tokenizer *self) { Py_ssize_t reset = self->head; int best, i, current, level, diff; @@ -887,21 +969,20 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) self->head++; } current = heading_level_from_context(self->topstack->context); - level = current > best ? (best > 6 ? 6 : best) : - (current > 6 ? 6 : current); - after = (HeadingData*) Tokenizer_parse(self, self->topstack->context, 1); + level = current > best ? (best > 6 ? 6 : best) : (current > 6 ? 6 : current); + after = (HeadingData *) Tokenizer_parse(self, self->topstack->context, 1); if (BAD_ROUTE) { RESET_ROUTE(); if (level < best) { diff = best - level; for (i = 0; i < diff; i++) { - if (Tokenizer_emit_char(self, '=')) + if (Tokenizer_emit_char(self, '=')) { return NULL; + } } } self->head = reset + best - 1; - } - else { + } else { if (!after) { return NULL; } @@ -922,8 +1003,9 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) free(after); } stack = Tokenizer_pop(self); - if (!stack) + if (!stack) { return NULL; + } heading = malloc(sizeof(HeadingData)); if (!heading) { PyErr_NoMemory(); @@ -937,21 +1019,24 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) /* Actually parse an HTML entity and ensure that it is valid. */ -static int Tokenizer_really_parse_entity(Tokenizer* self) +static int +Tokenizer_really_parse_entity(Tokenizer *self) { PyObject *kwargs, *charobj, *textobj; Py_UCS4 this; int numeric, hexadecimal, i, j, zeroes, test; char *valid, *text, *buffer, *def; - #define FAIL_ROUTE_AND_EXIT() { \ - Tokenizer_fail_route(self); \ - free(text); \ - return 0; \ - } +#define FAIL_ROUTE_AND_EXIT() \ + do { \ + Tokenizer_fail_route(self); \ + free(text); \ + return 0; \ + } while (0) - if (Tokenizer_emit(self, HTMLEntityStart)) + if (Tokenizer_emit(self, HTMLEntityStart)) { return -1; + } self->head++; this = Tokenizer_read(self, 0); if (!this) { @@ -960,8 +1045,9 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) } if (this == '#') { numeric = 1; - if (Tokenizer_emit(self, HTMLEntityNumeric)) + if (Tokenizer_emit(self, HTMLEntityNumeric)) { return -1; + } self->head++; this = Tokenizer_read(self, 0); if (!this) { @@ -971,29 +1057,32 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) if (this == 'x' || this == 'X') { hexadecimal = 1; kwargs = PyDict_New(); - if (!kwargs) + if (!kwargs) { return -1; + } if (!(charobj = PyUnicode_FROM_SINGLE(this))) { Py_DECREF(kwargs); return -1; } PyDict_SetItemString(kwargs, "char", charobj); Py_DECREF(charobj); - if (Tokenizer_emit_kwargs(self, HTMLEntityHex, kwargs)) + if (Tokenizer_emit_kwargs(self, HTMLEntityHex, kwargs)) { return -1; + } self->head++; - } - else + } else { hexadecimal = 0; - } - else + } + } else { numeric = hexadecimal = 0; - if (hexadecimal) + } + if (hexadecimal) { valid = HEXDIGITS; - else if (numeric) + } else if (numeric) { valid = DIGITS; - else + } else { valid = ALPHANUM; + } text = calloc(MAX_ENTITY_SIZE, sizeof(char)); if (!text) { PyErr_NoMemory(); @@ -1004,8 +1093,9 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) while (1) { this = Tokenizer_read(self, 0); if (this == ';') { - if (i == 0) - FAIL_ROUTE_AND_EXIT() + if (i == 0) { + FAIL_ROUTE_AND_EXIT(); + } break; } if (i == 0 && this == '0') { @@ -1013,16 +1103,20 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) self->head++; continue; } - if (i >= MAX_ENTITY_SIZE) - FAIL_ROUTE_AND_EXIT() - if (is_marker(this)) - FAIL_ROUTE_AND_EXIT() + if (i >= MAX_ENTITY_SIZE) { + FAIL_ROUTE_AND_EXIT(); + } + if (is_marker(this)) { + FAIL_ROUTE_AND_EXIT(); + } j = 0; while (1) { - if (!valid[j]) - FAIL_ROUTE_AND_EXIT() - if (this == (Py_UCS4) valid[j]) + if (!valid[j]) { + FAIL_ROUTE_AND_EXIT(); + } + if (this == (Py_UCS4) valid[j]) { break; + } j++; } text[i] = (char) this; @@ -1031,17 +1125,19 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) } if (numeric) { sscanf(text, (hexadecimal ? "%x" : "%d"), &test); - if (test < 1 || test > 0x10FFFF) - FAIL_ROUTE_AND_EXIT() - } - else { + if (test < 1 || test > 0x10FFFF) { + FAIL_ROUTE_AND_EXIT(); + } + } else { i = 0; while (1) { def = entitydefs[i]; - if (!def) // We've reached the end of the defs without finding it - FAIL_ROUTE_AND_EXIT() - if (strcmp(text, def) == 0) + if (!def) { // We've reached the end of the defs without finding it + FAIL_ROUTE_AND_EXIT(); + } + if (strcmp(text, def) == 0) { break; + } i++; } } @@ -1052,8 +1148,9 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) PyErr_NoMemory(); return -1; } - for (i = 0; i < zeroes; i++) + for (i = 0; i < zeroes; i++) { strcat(buffer, "0"); + } strcat(buffer, text); free(text); text = buffer; @@ -1071,38 +1168,46 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) } PyDict_SetItemString(kwargs, "text", textobj); Py_DECREF(textobj); - if (Tokenizer_emit_kwargs(self, Text, kwargs)) + if (Tokenizer_emit_kwargs(self, Text, kwargs)) { return -1; - if (Tokenizer_emit(self, HTMLEntityEnd)) + } + if (Tokenizer_emit(self, HTMLEntityEnd)) { return -1; + } return 0; } /* Parse an HTML entity at the head of the wikicode string. */ -static int Tokenizer_parse_entity(Tokenizer* self) +static int +Tokenizer_parse_entity(Tokenizer *self) { Py_ssize_t reset = self->head; PyObject *tokenlist; - if (Tokenizer_check_route(self, LC_HTML_ENTITY) < 0) + if (Tokenizer_check_route(self, LC_HTML_ENTITY) < 0) { goto on_bad_route; - if (Tokenizer_push(self, LC_HTML_ENTITY)) + } + if (Tokenizer_push(self, LC_HTML_ENTITY)) { return -1; - if (Tokenizer_really_parse_entity(self)) + } + if (Tokenizer_really_parse_entity(self)) { return -1; + } if (BAD_ROUTE) { - on_bad_route: + on_bad_route: RESET_ROUTE(); self->head = reset; - if (Tokenizer_emit_char(self, '&')) + if (Tokenizer_emit_char(self, '&')) { return -1; + } return 0; } tokenlist = Tokenizer_pop(self); - if (!tokenlist) + if (!tokenlist) { return -1; + } if (Tokenizer_emit_all(self, tokenlist)) { Py_DECREF(tokenlist); return -1; @@ -1114,15 +1219,17 @@ static int Tokenizer_parse_entity(Tokenizer* self) /* Parse an HTML comment at the head of the wikicode string. */ -static int Tokenizer_parse_comment(Tokenizer* self) +static int +Tokenizer_parse_comment(Tokenizer *self) { Py_ssize_t reset = self->head + 3; PyObject *comment; Py_UCS4 this; self->head += 4; - if (Tokenizer_push(self, 0)) + if (Tokenizer_push(self, 0)) { return -1; + } while (1) { this = Tokenizer_read(self, 0); if (!this) { @@ -1132,16 +1239,20 @@ static int Tokenizer_parse_comment(Tokenizer* self) return Tokenizer_emit_text(self, " +CommentStart = make("CommentStart") # -TagOpenOpen = make("TagOpenOpen") # < +TagOpenOpen = make("TagOpenOpen") # < TagAttrStart = make("TagAttrStart") -TagAttrEquals = make("TagAttrEquals") # = -TagAttrQuote = make("TagAttrQuote") # ", ' -TagCloseOpen = make("TagCloseOpen") # > -TagCloseSelfclose = make("TagCloseSelfclose") # /> -TagOpenClose = make("TagOpenClose") # +TagAttrEquals = make("TagAttrEquals") # = +TagAttrQuote = make("TagAttrQuote") # ", ' +TagCloseOpen = make("TagCloseOpen") # > +TagCloseSelfclose = make("TagCloseSelfclose") # /> +TagOpenClose = make("TagOpenClose") # del make diff --git a/src/mwparserfromhell/smart_list/list_proxy.py b/src/mwparserfromhell/smart_list/list_proxy.py index d2d89e9..5132def 100644 --- a/src/mwparserfromhell/smart_list/list_proxy.py +++ b/src/mwparserfromhell/smart_list/list_proxy.py @@ -167,7 +167,7 @@ class ListProxy(_SliceNormalizerMixIn, list): def _render(self): """Return the actual list from the stored start/stop/step.""" - return list(self._parent)[self._start:self._stop:self._step] + return list(self._parent)[self._start : self._stop : self._step] @inheritdoc def append(self, item): @@ -187,7 +187,7 @@ class ListProxy(_SliceNormalizerMixIn, list): @inheritdoc def extend(self, item): - self._parent[self._stop:self._stop] = item + self._parent[self._stop : self._stop] = item @inheritdoc def insert(self, index, item): @@ -215,7 +215,7 @@ class ListProxy(_SliceNormalizerMixIn, list): def reverse(self): item = self._render() item.reverse() - self._parent[self._start:self._stop:self._step] = item + self._parent[self._start : self._stop : self._step] = item @inheritdoc def sort(self, key=None, reverse=None): @@ -226,4 +226,4 @@ class ListProxy(_SliceNormalizerMixIn, list): if reverse is not None: kwargs["reverse"] = reverse item.sort(**kwargs) - self._parent[self._start:self._stop:self._step] = item + self._parent[self._start : self._stop : self._step] = item diff --git a/src/mwparserfromhell/string_mixin.py b/src/mwparserfromhell/string_mixin.py index 2aeabf5..a342de4 100644 --- a/src/mwparserfromhell/string_mixin.py +++ b/src/mwparserfromhell/string_mixin.py @@ -27,6 +27,7 @@ from sys import getdefaultencoding __all__ = ["StringMixIn"] + def inheritdoc(method): """Set __doc__ of *method* to __doc__ of *method* in its parent class. @@ -36,6 +37,7 @@ def inheritdoc(method): method.__doc__ = getattr(str, method.__name__).__doc__ return method + class StringMixIn: """Implement the interface for ``str`` in a dynamic manner. @@ -92,8 +94,9 @@ class StringMixIn: def __getattr__(self, attr): if not hasattr(str, attr): - raise AttributeError("{!r} object has no attribute {!r}".format( - type(self).__name__, attr)) + raise AttributeError( + "{!r} object has no attribute {!r}".format(type(self).__name__, attr) + ) return getattr(self.__str__(), attr) maketrans = str.maketrans # Static method can't rely on __getattr__ diff --git a/src/mwparserfromhell/utils.py b/src/mwparserfromhell/utils.py index 5d262b9..0ed1d56 100644 --- a/src/mwparserfromhell/utils.py +++ b/src/mwparserfromhell/utils.py @@ -25,6 +25,7 @@ users generally won't need stuff from here. __all__ = ["parse_anything"] + def parse_anything(value, context=0, skip_style_tags=False): """Return a :class:`.Wikicode` for *value*, allowing multiple types. @@ -64,6 +65,8 @@ def parse_anything(value, context=0, skip_style_tags=False): nodelist += parse_anything(item, context, skip_style_tags).nodes return Wikicode(nodelist) except TypeError as exc: - error = ("Needs string, Node, Wikicode, file, int, None, or " - "iterable of these, but got {0}: {1}") + error = ( + "Needs string, Node, Wikicode, file, int, None, or " + "iterable of these, but got {0}: {1}" + ) raise ValueError(error.format(type(value).__name__, value)) from exc diff --git a/src/mwparserfromhell/wikicode.py b/src/mwparserfromhell/wikicode.py index bbd38a2..4d4f9b3 100644 --- a/src/mwparserfromhell/wikicode.py +++ b/src/mwparserfromhell/wikicode.py @@ -21,8 +21,18 @@ import re from itertools import chain -from .nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, - Node, Tag, Template, Text, Wikilink) +from .nodes import ( + Argument, + Comment, + ExternalLink, + Heading, + HTMLEntity, + Node, + Tag, + Template, + Text, + Wikilink, +) from .smart_list.list_proxy import ListProxy from .string_mixin import StringMixIn from .utils import parse_anything @@ -31,6 +41,7 @@ __all__ = ["Wikicode"] FLAGS = re.IGNORECASE | re.DOTALL | re.UNICODE + class Wikicode(StringMixIn): """A ``Wikicode`` is a container for nodes that operates like a string. @@ -41,6 +52,7 @@ class Wikicode(StringMixIn): ` series of functions is very useful for extracting and iterating over, for example, all of the templates in the object. """ + RECURSE_OTHERS = 2 def __init__(self, nodes): @@ -82,8 +94,9 @@ class Wikicode(StringMixIn): return lambda obj: re.search(matches, str(obj), flags) return lambda obj: True - def _indexed_ifilter(self, recursive=True, matches=None, flags=FLAGS, - forcetype=None): + def _indexed_ifilter( + self, recursive=True, matches=None, flags=FLAGS, forcetype=None + ): """Iterate over nodes and their corresponding indices in the node list. The arguments are interpreted as for :meth:`ifilter`. For each tuple @@ -94,9 +107,11 @@ class Wikicode(StringMixIn): match = self._build_matcher(matches, flags) if recursive: restrict = forcetype if recursive == self.RECURSE_OTHERS else None + def getter(i, node): for ch in self._get_children(node, restrict=restrict): yield (i, ch) + inodes = chain(*(getter(i, n) for i, n in enumerate(self.nodes))) else: inodes = enumerate(self.nodes) @@ -106,6 +121,7 @@ class Wikicode(StringMixIn): def _is_child_wikicode(self, obj, recursive=True): """Return whether the given :class:`.Wikicode` is a descendant.""" + def deref(nodes): if isinstance(nodes, ListProxy): return nodes._parent # pylint: disable=protected-access @@ -210,6 +226,7 @@ class Wikicode(StringMixIn): should be any object that can be tested for with ``is``. *indent* is the starting indentation. """ + def write(*args): """Write a new line following the proper indentation rules.""" if lines and lines[-1] is marker: # Continue from the last line @@ -243,10 +260,12 @@ class Wikicode(StringMixIn): This is equivalent to :meth:`{1}` with *forcetype* set to :class:`~{2.__module__}.{2.__name__}`. """ - make_ifilter = lambda ftype: (lambda self, *a, **kw: - self.ifilter(forcetype=ftype, *a, **kw)) - make_filter = lambda ftype: (lambda self, *a, **kw: - self.filter(forcetype=ftype, *a, **kw)) + make_ifilter = lambda ftype: ( + lambda self, *a, **kw: self.ifilter(forcetype=ftype, *a, **kw) + ) + make_filter = lambda ftype: ( + lambda self, *a, **kw: self.filter(forcetype=ftype, *a, **kw) + ) for name, ftype in meths.items(): ifilt = make_ifilter(ftype) filt = make_filter(ftype) @@ -342,6 +361,7 @@ class Wikicode(StringMixIn): Will return an empty list if *obj* is at the top level of this Wikicode object. Will raise :exc:`ValueError` if it wasn't found. """ + def _get_ancestors(code, needle): for node in code.nodes: if node is needle: @@ -510,8 +530,7 @@ class Wikicode(StringMixIn): return True return False - def ifilter(self, recursive=True, matches=None, flags=FLAGS, - forcetype=None): + def ifilter(self, recursive=True, matches=None, flags=FLAGS, forcetype=None): """Iterate over nodes in our list matching certain conditions. If *forcetype* is given, only nodes that are instances of this type (or @@ -545,8 +564,15 @@ class Wikicode(StringMixIn): """ return list(self.ifilter(*args, **kwargs)) - def get_sections(self, levels=None, matches=None, flags=FLAGS, flat=False, - include_lead=None, include_headings=True): + def get_sections( + self, + levels=None, + matches=None, + flags=FLAGS, + flat=False, + include_lead=None, + include_headings=True, + ): """Return a list of sections within the page. Sections are returned as :class:`.Wikicode` objects with a shared node @@ -568,12 +594,14 @@ class Wikicode(StringMixIn): :class:`.Heading` object will be included; otherwise, this is skipped. """ title_matcher = self._build_matcher(matches, flags) - matcher = lambda heading: (title_matcher(heading.title) and - (not levels or heading.level in levels)) + matcher = lambda heading: ( + title_matcher(heading.title) and (not levels or heading.level in levels) + ) iheadings = self._indexed_ifilter(recursive=False, forcetype=Heading) sections = [] # Tuples of (index_of_first_node, section) - open_headings = [] # Tuples of (index, heading), where index and - # heading.level are both monotonically increasing + # Tuples of (index, heading), where index and heading.level are both + # monotonically increasing + open_headings = [] # Add the lead section if appropriate: if include_lead or not (include_lead is not None or matches or levels): @@ -610,8 +638,7 @@ class Wikicode(StringMixIn): # Ensure that earlier sections are earlier in the returned list: return [section for i, section in sorted(sections)] - def strip_code(self, normalize=True, collapse=True, - keep_template_params=False): + def strip_code(self, normalize=True, collapse=True, keep_template_params=False): """Return a rendered string without unprintable code such as templates. The way a node is stripped is handled by the @@ -631,7 +658,7 @@ class Wikicode(StringMixIn): kwargs = { "normalize": normalize, "collapse": collapse, - "keep_template_params": keep_template_params + "keep_template_params": keep_template_params, } nodes = [] @@ -673,7 +700,15 @@ class Wikicode(StringMixIn): marker = object() # Random object we can find with certainty in a list return "\n".join(self._get_tree(self, [], marker, 0)) + Wikicode._build_filter_methods( - arguments=Argument, comments=Comment, external_links=ExternalLink, - headings=Heading, html_entities=HTMLEntity, tags=Tag, templates=Template, - text=Text, wikilinks=Wikilink) + arguments=Argument, + comments=Comment, + external_links=ExternalLink, + headings=Heading, + html_entities=HTMLEntity, + tags=Tag, + templates=Template, + text=Text, + wikilinks=Wikilink, +) diff --git a/tests/conftest.py b/tests/conftest.py index 0265a7a..7d2812f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -18,14 +18,24 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from mwparserfromhell.nodes import (Argument, Comment, ExternalLink, Heading, - HTMLEntity, Tag, Template, Text, Wikilink) +from mwparserfromhell.nodes import ( + Argument, + Comment, + ExternalLink, + Heading, + HTMLEntity, + Tag, + Template, + Text, + Wikilink, +) from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode wrap = lambda L: Wikicode(SmartList(L)) wraptext = lambda *args: wrap([Text(t) for t in args]) + def _assert_node_equal(expected, actual): """Assert that two Nodes have the same type and have the same data.""" registry = { @@ -43,6 +53,7 @@ def _assert_node_equal(expected, actual): assert type(expected) == type(actual) registry[type(expected)](expected, actual) + def _assert_argument_node_equal(expected, actual): """Assert that two Argument nodes have the same data.""" assert_wikicode_equal(expected.name, actual.name) @@ -51,10 +62,12 @@ def _assert_argument_node_equal(expected, actual): else: assert actual.default is None + def _assert_comment_node_equal(expected, actual): """Assert that two Comment nodes have the same data.""" assert expected.contents == actual.contents + def _assert_external_link_node_equal(expected, actual): """Assert that two ExternalLink nodes have the same data.""" assert_wikicode_equal(expected.url, actual.url) @@ -65,11 +78,13 @@ def _assert_external_link_node_equal(expected, actual): assert expected.brackets is actual.brackets assert expected.suppress_space is actual.suppress_space + def _assert_heading_node_equal(expected, actual): """Assert that two Heading nodes have the same data.""" assert_wikicode_equal(expected.title, actual.title) assert expected.level == actual.level + def _assert_html_entity_node_equal(expected, actual): """Assert that two HTMLEntity nodes have the same data.""" assert expected.value == actual.value @@ -77,6 +92,7 @@ def _assert_html_entity_node_equal(expected, actual): assert expected.hexadecimal is actual.hexadecimal assert expected.hex_char == actual.hex_char + def _assert_tag_node_equal(expected, actual): """Assert that two Tag nodes have the same data.""" assert_wikicode_equal(expected.tag, actual.tag) @@ -105,6 +121,7 @@ def _assert_tag_node_equal(expected, actual): assert expected.padding == actual.padding assert_wikicode_equal(expected.closing_tag, actual.closing_tag) + def _assert_template_node_equal(expected, actual): """Assert that two Template nodes have the same data.""" assert_wikicode_equal(expected.name, actual.name) @@ -117,10 +134,12 @@ def _assert_template_node_equal(expected, actual): assert_wikicode_equal(exp_param.value, act_param.value) assert exp_param.showkey is act_param.showkey + def _assert_text_node_equal(expected, actual): """Assert that two Text nodes have the same data.""" assert expected.value == actual.value + def _assert_wikilink_node_equal(expected, actual): """Assert that two Wikilink nodes have the same data.""" assert_wikicode_equal(expected.title, actual.title) @@ -129,6 +148,7 @@ def _assert_wikilink_node_equal(expected, actual): else: assert actual.text is None + def assert_wikicode_equal(expected, actual): """Assert that two Wikicode objects have the same data.""" assert isinstance(actual, Wikicode) diff --git a/tests/test_argument.py b/tests/test_argument.py index 218e42d..437a2ac 100644 --- a/tests/test_argument.py +++ b/tests/test_argument.py @@ -27,6 +27,7 @@ import pytest from mwparserfromhell.nodes import Argument, Text from .conftest import assert_wikicode_equal, wrap, wraptext + def test_str(): """test Argument.__str__()""" node = Argument(wraptext("foobar")) @@ -34,6 +35,7 @@ def test_str(): node2 = Argument(wraptext("foo"), wraptext("bar")) assert "{{{foo|bar}}}" == str(node2) + def test_children(): """test Argument.__children__()""" node1 = Argument(wraptext("foobar")) @@ -48,6 +50,7 @@ def test_children(): with pytest.raises(StopIteration): next(gen2) + def test_strip(): """test Argument.__strip__()""" node1 = Argument(wraptext("foobar")) @@ -55,6 +58,7 @@ def test_strip(): assert node1.__strip__() is None assert "bar" == node2.__strip__() + def test_showtree(): """test Argument.__showtree__()""" output = [] @@ -66,10 +70,19 @@ def test_showtree(): node1.__showtree__(output.append, get, mark) node2.__showtree__(output.append, get, mark) valid = [ - "{{{", (getter, node1.name), "}}}", "{{{", (getter, node2.name), - " | ", marker, (getter, node2.default), "}}}"] + "{{{", + (getter, node1.name), + "}}}", + "{{{", + (getter, node2.name), + " | ", + marker, + (getter, node2.default), + "}}}", + ] assert valid == output + def test_name(): """test getter/setter for the name attribute""" name = wraptext("foobar") @@ -82,6 +95,7 @@ def test_name(): assert_wikicode_equal(wraptext("héhehé"), node1.name) assert_wikicode_equal(wraptext("héhehé"), node2.name) + def test_default(): """test getter/setter for the default attribute""" default = wraptext("baz") diff --git a/tests/test_attribute.py b/tests/test_attribute.py index 7d845eb..0d67f2a 100644 --- a/tests/test_attribute.py +++ b/tests/test_attribute.py @@ -28,6 +28,7 @@ from mwparserfromhell.nodes import Template from mwparserfromhell.nodes.extras import Attribute from .conftest import assert_wikicode_equal, wrap, wraptext + def test_str(): """test Attribute.__str__()""" node = Attribute(wraptext("foo")) @@ -43,6 +44,7 @@ def test_str(): node6 = Attribute(wraptext("a"), wrap([]), None, " ", "", " ") assert " a= " == str(node6) + def test_name(): """test getter/setter for the name attribute""" name = wraptext("id") @@ -51,6 +53,7 @@ def test_name(): node.name = "{{id}}" assert_wikicode_equal(wrap([Template(wraptext("id"))]), node.name) + def test_value(): """test getter/setter for the value attribute""" value = wraptext("foo") @@ -74,6 +77,7 @@ def test_value(): assert_wikicode_equal(wraptext("fo\"o 'bar' b\"az"), node2.value) assert '"' == node2.quotes + def test_quotes(): """test getter/setter for the quotes attribute""" node1 = Attribute(wraptext("id"), wraptext("foo"), None) @@ -92,6 +96,7 @@ def test_quotes(): with pytest.raises(ValueError): Attribute(wraptext("id"), wraptext("foo bar baz"), None) + def test_padding(): """test getter/setter for the padding attributes""" for pad in ["pad_first", "pad_before_eq", "pad_after_eq"]: diff --git a/tests/test_builder.py b/tests/test_builder.py index 763763a..2a4eb4a 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -24,403 +24,814 @@ Tests for the builder, which turns tokens into Wikicode objects. import pytest -from mwparserfromhell.nodes import (Argument, Comment, ExternalLink, Heading, - HTMLEntity, Tag, Template, Text, Wikilink) +from mwparserfromhell.nodes import ( + Argument, + Comment, + ExternalLink, + Heading, + HTMLEntity, + Tag, + Template, + Text, + Wikilink, +) from mwparserfromhell.nodes.extras import Attribute, Parameter from mwparserfromhell.parser import tokens, ParserError from mwparserfromhell.parser.builder import Builder from .conftest import assert_wikicode_equal, wrap, wraptext + @pytest.fixture() def builder(): return Builder() -@pytest.mark.parametrize("test,valid", [ - ([tokens.Text(text="foobar")], wraptext("foobar")), - ([tokens.Text(text="fóóbar")], wraptext("fóóbar")), - ([tokens.Text(text="spam"), tokens.Text(text="eggs")], - wraptext("spam", "eggs")), -]) + +@pytest.mark.parametrize( + "test,valid", + [ + ([tokens.Text(text="foobar")], wraptext("foobar")), + ([tokens.Text(text="fóóbar")], wraptext("fóóbar")), + ( + [tokens.Text(text="spam"), tokens.Text(text="eggs")], + wraptext("spam", "eggs"), + ), + ], +) def test_text(builder, test, valid): """tests for building Text nodes""" assert_wikicode_equal(valid, builder.build(test)) -@pytest.mark.parametrize("test,valid", [ - ([tokens.TemplateOpen(), tokens.Text(text="foobar"), - tokens.TemplateClose()], - wrap([Template(wraptext("foobar"))])), - - ([tokens.TemplateOpen(), tokens.Text(text="spam"), - tokens.Text(text="eggs"), tokens.TemplateClose()], - wrap([Template(wraptext("spam", "eggs"))])), - - ([tokens.TemplateOpen(), tokens.Text(text="foo"), - tokens.TemplateParamSeparator(), tokens.Text(text="bar"), - tokens.TemplateClose()], - wrap([Template(wraptext("foo"), params=[ - Parameter(wraptext("1"), wraptext("bar"), showkey=False)])])), - ([tokens.TemplateOpen(), tokens.Text(text="foo"), - tokens.TemplateParamSeparator(), tokens.Text(text="bar"), - tokens.TemplateParamEquals(), tokens.Text(text="baz"), - tokens.TemplateClose()], - wrap([Template(wraptext("foo"), params=[ - Parameter(wraptext("bar"), wraptext("baz"))])])), - - ([tokens.TemplateOpen(), tokens.TemplateParamSeparator(), - tokens.TemplateParamSeparator(), tokens.TemplateParamEquals(), - tokens.TemplateParamSeparator(), tokens.TemplateClose()], - wrap([Template(wrap([]), params=[ - Parameter(wraptext("1"), wrap([]), showkey=False), - Parameter(wrap([]), wrap([]), showkey=True), - Parameter(wraptext("2"), wrap([]), showkey=False)])])), - - ([tokens.TemplateOpen(), tokens.Text(text="foo"), - tokens.TemplateParamSeparator(), tokens.Text(text="bar"), - tokens.TemplateParamEquals(), tokens.Text(text="baz"), - tokens.TemplateParamSeparator(), tokens.Text(text="biz"), - tokens.TemplateParamSeparator(), tokens.Text(text="buzz"), - tokens.TemplateParamSeparator(), tokens.Text(text="3"), - tokens.TemplateParamEquals(), tokens.Text(text="buff"), - tokens.TemplateParamSeparator(), tokens.Text(text="baff"), - tokens.TemplateClose()], - wrap([Template(wraptext("foo"), params=[ - Parameter(wraptext("bar"), wraptext("baz")), - Parameter(wraptext("1"), wraptext("biz"), showkey=False), - Parameter(wraptext("2"), wraptext("buzz"), showkey=False), - Parameter(wraptext("3"), wraptext("buff")), - Parameter(wraptext("3"), wraptext("baff"), - showkey=False)])])), -]) +@pytest.mark.parametrize( + "test,valid", + [ + ( + [tokens.TemplateOpen(), tokens.Text(text="foobar"), tokens.TemplateClose()], + wrap([Template(wraptext("foobar"))]), + ), + ( + [ + tokens.TemplateOpen(), + tokens.Text(text="spam"), + tokens.Text(text="eggs"), + tokens.TemplateClose(), + ], + wrap([Template(wraptext("spam", "eggs"))]), + ), + ( + [ + tokens.TemplateOpen(), + tokens.Text(text="foo"), + tokens.TemplateParamSeparator(), + tokens.Text(text="bar"), + tokens.TemplateClose(), + ], + wrap( + [ + Template( + wraptext("foo"), + params=[ + Parameter(wraptext("1"), wraptext("bar"), showkey=False) + ], + ) + ] + ), + ), + ( + [ + tokens.TemplateOpen(), + tokens.Text(text="foo"), + tokens.TemplateParamSeparator(), + tokens.Text(text="bar"), + tokens.TemplateParamEquals(), + tokens.Text(text="baz"), + tokens.TemplateClose(), + ], + wrap( + [ + Template( + wraptext("foo"), + params=[Parameter(wraptext("bar"), wraptext("baz"))], + ) + ] + ), + ), + ( + [ + tokens.TemplateOpen(), + tokens.TemplateParamSeparator(), + tokens.TemplateParamSeparator(), + tokens.TemplateParamEquals(), + tokens.TemplateParamSeparator(), + tokens.TemplateClose(), + ], + wrap( + [ + Template( + wrap([]), + params=[ + Parameter(wraptext("1"), wrap([]), showkey=False), + Parameter(wrap([]), wrap([]), showkey=True), + Parameter(wraptext("2"), wrap([]), showkey=False), + ], + ) + ] + ), + ), + ( + [ + tokens.TemplateOpen(), + tokens.Text(text="foo"), + tokens.TemplateParamSeparator(), + tokens.Text(text="bar"), + tokens.TemplateParamEquals(), + tokens.Text(text="baz"), + tokens.TemplateParamSeparator(), + tokens.Text(text="biz"), + tokens.TemplateParamSeparator(), + tokens.Text(text="buzz"), + tokens.TemplateParamSeparator(), + tokens.Text(text="3"), + tokens.TemplateParamEquals(), + tokens.Text(text="buff"), + tokens.TemplateParamSeparator(), + tokens.Text(text="baff"), + tokens.TemplateClose(), + ], + wrap( + [ + Template( + wraptext("foo"), + params=[ + Parameter(wraptext("bar"), wraptext("baz")), + Parameter(wraptext("1"), wraptext("biz"), showkey=False), + Parameter(wraptext("2"), wraptext("buzz"), showkey=False), + Parameter(wraptext("3"), wraptext("buff")), + Parameter(wraptext("3"), wraptext("baff"), showkey=False), + ], + ) + ] + ), + ), + ], +) def test_template(builder, test, valid): """tests for building Template nodes""" assert_wikicode_equal(valid, builder.build(test)) -@pytest.mark.parametrize("test,valid", [ - ([tokens.ArgumentOpen(), tokens.Text(text="foobar"), - tokens.ArgumentClose()], - wrap([Argument(wraptext("foobar"))])), - - ([tokens.ArgumentOpen(), tokens.Text(text="spam"), - tokens.Text(text="eggs"), tokens.ArgumentClose()], - wrap([Argument(wraptext("spam", "eggs"))])), - ([tokens.ArgumentOpen(), tokens.Text(text="foo"), - tokens.ArgumentSeparator(), tokens.Text(text="bar"), - tokens.ArgumentClose()], - wrap([Argument(wraptext("foo"), wraptext("bar"))])), - - ([tokens.ArgumentOpen(), tokens.Text(text="foo"), - tokens.Text(text="bar"), tokens.ArgumentSeparator(), - tokens.Text(text="baz"), tokens.Text(text="biz"), - tokens.ArgumentClose()], - wrap([Argument(wraptext("foo", "bar"), wraptext("baz", "biz"))])), -]) +@pytest.mark.parametrize( + "test,valid", + [ + ( + [tokens.ArgumentOpen(), tokens.Text(text="foobar"), tokens.ArgumentClose()], + wrap([Argument(wraptext("foobar"))]), + ), + ( + [ + tokens.ArgumentOpen(), + tokens.Text(text="spam"), + tokens.Text(text="eggs"), + tokens.ArgumentClose(), + ], + wrap([Argument(wraptext("spam", "eggs"))]), + ), + ( + [ + tokens.ArgumentOpen(), + tokens.Text(text="foo"), + tokens.ArgumentSeparator(), + tokens.Text(text="bar"), + tokens.ArgumentClose(), + ], + wrap([Argument(wraptext("foo"), wraptext("bar"))]), + ), + ( + [ + tokens.ArgumentOpen(), + tokens.Text(text="foo"), + tokens.Text(text="bar"), + tokens.ArgumentSeparator(), + tokens.Text(text="baz"), + tokens.Text(text="biz"), + tokens.ArgumentClose(), + ], + wrap([Argument(wraptext("foo", "bar"), wraptext("baz", "biz"))]), + ), + ], +) def test_argument(builder, test, valid): """tests for building Argument nodes""" assert_wikicode_equal(valid, builder.build(test)) -@pytest.mark.parametrize("test,valid", [ - ([tokens.WikilinkOpen(), tokens.Text(text="foobar"), - tokens.WikilinkClose()], - wrap([Wikilink(wraptext("foobar"))])), - - ([tokens.WikilinkOpen(), tokens.Text(text="spam"), - tokens.Text(text="eggs"), tokens.WikilinkClose()], - wrap([Wikilink(wraptext("spam", "eggs"))])), - ([tokens.WikilinkOpen(), tokens.Text(text="foo"), - tokens.WikilinkSeparator(), tokens.Text(text="bar"), - tokens.WikilinkClose()], - wrap([Wikilink(wraptext("foo"), wraptext("bar"))])), - - ([tokens.WikilinkOpen(), tokens.Text(text="foo"), - tokens.Text(text="bar"), tokens.WikilinkSeparator(), - tokens.Text(text="baz"), tokens.Text(text="biz"), - tokens.WikilinkClose()], - wrap([Wikilink(wraptext("foo", "bar"), wraptext("baz", "biz"))])), -]) +@pytest.mark.parametrize( + "test,valid", + [ + ( + [tokens.WikilinkOpen(), tokens.Text(text="foobar"), tokens.WikilinkClose()], + wrap([Wikilink(wraptext("foobar"))]), + ), + ( + [ + tokens.WikilinkOpen(), + tokens.Text(text="spam"), + tokens.Text(text="eggs"), + tokens.WikilinkClose(), + ], + wrap([Wikilink(wraptext("spam", "eggs"))]), + ), + ( + [ + tokens.WikilinkOpen(), + tokens.Text(text="foo"), + tokens.WikilinkSeparator(), + tokens.Text(text="bar"), + tokens.WikilinkClose(), + ], + wrap([Wikilink(wraptext("foo"), wraptext("bar"))]), + ), + ( + [ + tokens.WikilinkOpen(), + tokens.Text(text="foo"), + tokens.Text(text="bar"), + tokens.WikilinkSeparator(), + tokens.Text(text="baz"), + tokens.Text(text="biz"), + tokens.WikilinkClose(), + ], + wrap([Wikilink(wraptext("foo", "bar"), wraptext("baz", "biz"))]), + ), + ], +) def test_wikilink(builder, test, valid): """tests for building Wikilink nodes""" assert_wikicode_equal(valid, builder.build(test)) -@pytest.mark.parametrize("test,valid", [ - ([tokens.ExternalLinkOpen(brackets=False), - tokens.Text(text="http://example.com/"), - tokens.ExternalLinkClose()], - wrap([ExternalLink(wraptext("http://example.com/"), - brackets=False)])), - - ([tokens.ExternalLinkOpen(brackets=True), - tokens.Text(text="http://example.com/"), - tokens.ExternalLinkClose()], - wrap([ExternalLink(wraptext("http://example.com/"))])), - - ([tokens.ExternalLinkOpen(brackets=True), - tokens.Text(text="http://example.com/"), - tokens.ExternalLinkSeparator(), tokens.ExternalLinkClose()], - wrap([ExternalLink(wraptext("http://example.com/"), wrap([]))])), - - ([tokens.ExternalLinkOpen(brackets=True), - tokens.Text(text="http://example.com/"), - tokens.ExternalLinkSeparator(), tokens.Text(text="Example"), - tokens.ExternalLinkClose()], - wrap([ExternalLink(wraptext("http://example.com/"), - wraptext("Example"))])), - - ([tokens.ExternalLinkOpen(brackets=False), - tokens.Text(text="http://example"), tokens.Text(text=".com/foo"), - tokens.ExternalLinkClose()], - wrap([ExternalLink(wraptext("http://example", ".com/foo"), - brackets=False)])), - ([tokens.ExternalLinkOpen(brackets=True), - tokens.Text(text="http://example"), tokens.Text(text=".com/foo"), - tokens.ExternalLinkSeparator(), tokens.Text(text="Example"), - tokens.Text(text=" Web Page"), tokens.ExternalLinkClose()], - wrap([ExternalLink(wraptext("http://example", ".com/foo"), - wraptext("Example", " Web Page"))])), -]) +@pytest.mark.parametrize( + "test,valid", + [ + ( + [ + tokens.ExternalLinkOpen(brackets=False), + tokens.Text(text="http://example.com/"), + tokens.ExternalLinkClose(), + ], + wrap([ExternalLink(wraptext("http://example.com/"), brackets=False)]), + ), + ( + [ + tokens.ExternalLinkOpen(brackets=True), + tokens.Text(text="http://example.com/"), + tokens.ExternalLinkClose(), + ], + wrap([ExternalLink(wraptext("http://example.com/"))]), + ), + ( + [ + tokens.ExternalLinkOpen(brackets=True), + tokens.Text(text="http://example.com/"), + tokens.ExternalLinkSeparator(), + tokens.ExternalLinkClose(), + ], + wrap([ExternalLink(wraptext("http://example.com/"), wrap([]))]), + ), + ( + [ + tokens.ExternalLinkOpen(brackets=True), + tokens.Text(text="http://example.com/"), + tokens.ExternalLinkSeparator(), + tokens.Text(text="Example"), + tokens.ExternalLinkClose(), + ], + wrap([ExternalLink(wraptext("http://example.com/"), wraptext("Example"))]), + ), + ( + [ + tokens.ExternalLinkOpen(brackets=False), + tokens.Text(text="http://example"), + tokens.Text(text=".com/foo"), + tokens.ExternalLinkClose(), + ], + wrap( + [ExternalLink(wraptext("http://example", ".com/foo"), brackets=False)] + ), + ), + ( + [ + tokens.ExternalLinkOpen(brackets=True), + tokens.Text(text="http://example"), + tokens.Text(text=".com/foo"), + tokens.ExternalLinkSeparator(), + tokens.Text(text="Example"), + tokens.Text(text=" Web Page"), + tokens.ExternalLinkClose(), + ], + wrap( + [ + ExternalLink( + wraptext("http://example", ".com/foo"), + wraptext("Example", " Web Page"), + ) + ] + ), + ), + ], +) def test_external_link(builder, test, valid): """tests for building ExternalLink nodes""" assert_wikicode_equal(valid, builder.build(test)) -@pytest.mark.parametrize("test,valid", [ - ([tokens.HTMLEntityStart(), tokens.Text(text="nbsp"), - tokens.HTMLEntityEnd()], - wrap([HTMLEntity("nbsp", named=True, hexadecimal=False)])), - ([tokens.HTMLEntityStart(), tokens.HTMLEntityNumeric(), - tokens.Text(text="107"), tokens.HTMLEntityEnd()], - wrap([HTMLEntity("107", named=False, hexadecimal=False)])), - - ([tokens.HTMLEntityStart(), tokens.HTMLEntityNumeric(), - tokens.HTMLEntityHex(char="X"), tokens.Text(text="6B"), - tokens.HTMLEntityEnd()], - wrap([HTMLEntity("6B", named=False, hexadecimal=True, - hex_char="X")])), -]) +@pytest.mark.parametrize( + "test,valid", + [ + ( + [ + tokens.HTMLEntityStart(), + tokens.Text(text="nbsp"), + tokens.HTMLEntityEnd(), + ], + wrap([HTMLEntity("nbsp", named=True, hexadecimal=False)]), + ), + ( + [ + tokens.HTMLEntityStart(), + tokens.HTMLEntityNumeric(), + tokens.Text(text="107"), + tokens.HTMLEntityEnd(), + ], + wrap([HTMLEntity("107", named=False, hexadecimal=False)]), + ), + ( + [ + tokens.HTMLEntityStart(), + tokens.HTMLEntityNumeric(), + tokens.HTMLEntityHex(char="X"), + tokens.Text(text="6B"), + tokens.HTMLEntityEnd(), + ], + wrap([HTMLEntity("6B", named=False, hexadecimal=True, hex_char="X")]), + ), + ], +) def test_html_entity(builder, test, valid): """tests for building HTMLEntity nodes""" assert_wikicode_equal(valid, builder.build(test)) -@pytest.mark.parametrize("test,valid", [ - ([tokens.HeadingStart(level=2), tokens.Text(text="foobar"), - tokens.HeadingEnd()], - wrap([Heading(wraptext("foobar"), 2)])), - ([tokens.HeadingStart(level=4), tokens.Text(text="spam"), - tokens.Text(text="eggs"), tokens.HeadingEnd()], - wrap([Heading(wraptext("spam", "eggs"), 4)])), -]) +@pytest.mark.parametrize( + "test,valid", + [ + ( + [ + tokens.HeadingStart(level=2), + tokens.Text(text="foobar"), + tokens.HeadingEnd(), + ], + wrap([Heading(wraptext("foobar"), 2)]), + ), + ( + [ + tokens.HeadingStart(level=4), + tokens.Text(text="spam"), + tokens.Text(text="eggs"), + tokens.HeadingEnd(), + ], + wrap([Heading(wraptext("spam", "eggs"), 4)]), + ), + ], +) def test_heading(builder, test, valid): """tests for building Heading nodes""" assert_wikicode_equal(valid, builder.build(test)) -@pytest.mark.parametrize("test,valid", [ - ([tokens.CommentStart(), tokens.Text(text="foobar"), - tokens.CommentEnd()], - wrap([Comment("foobar")])), - ([tokens.CommentStart(), tokens.Text(text="spam"), - tokens.Text(text="eggs"), tokens.CommentEnd()], - wrap([Comment("spameggs")])), -]) +@pytest.mark.parametrize( + "test,valid", + [ + ( + [tokens.CommentStart(), tokens.Text(text="foobar"), tokens.CommentEnd()], + wrap([Comment("foobar")]), + ), + ( + [ + tokens.CommentStart(), + tokens.Text(text="spam"), + tokens.Text(text="eggs"), + tokens.CommentEnd(), + ], + wrap([Comment("spameggs")]), + ), + ], +) def test_comment(builder, test, valid): """tests for building Comment nodes""" assert_wikicode_equal(valid, builder.build(test)) -@pytest.mark.parametrize("test,valid", [ - # - ([tokens.TagOpenOpen(), tokens.Text(text="ref"), - tokens.TagCloseOpen(padding=""), tokens.TagOpenClose(), - tokens.Text(text="ref"), tokens.TagCloseClose()], - wrap([Tag(wraptext("ref"), wrap([]), - closing_tag=wraptext("ref"))])), - - # - ([tokens.TagOpenOpen(), tokens.Text(text="ref"), - tokens.TagAttrStart(pad_first=" ", pad_before_eq="", - pad_after_eq=""), - tokens.Text(text="name"), tokens.TagCloseOpen(padding=""), - tokens.TagOpenClose(), tokens.Text(text="ref"), - tokens.TagCloseClose()], - wrap([Tag(wraptext("ref"), wrap([]), - attrs=[Attribute(wraptext("name"))])])), - - # - ([tokens.TagOpenOpen(), tokens.Text(text="ref"), - tokens.TagAttrStart(pad_first=" ", pad_before_eq="", - pad_after_eq=""), - tokens.Text(text="name"), tokens.TagAttrEquals(), - tokens.TagAttrQuote(char='"'), tokens.Text(text="abc"), - tokens.TagCloseSelfclose(padding=" ")], - wrap([Tag(wraptext("ref"), - attrs=[Attribute(wraptext("name"), wraptext("abc"))], - self_closing=True, padding=" ")])), - - #
- ([tokens.TagOpenOpen(), tokens.Text(text="br"), - tokens.TagCloseSelfclose(padding="")], - wrap([Tag(wraptext("br"), self_closing=True)])), - - #
  • - ([tokens.TagOpenOpen(), tokens.Text(text="li"), - tokens.TagCloseSelfclose(padding="", implicit=True)], - wrap([Tag(wraptext("li"), self_closing=True, implicit=True)])), - - #
    - ([tokens.TagOpenOpen(invalid=True), tokens.Text(text="br"), - tokens.TagCloseSelfclose(padding="", implicit=True)], - wrap([Tag(wraptext("br"), self_closing=True, invalid=True, - implicit=True)])), - - #
    - ([tokens.TagOpenOpen(invalid=True), tokens.Text(text="br"), - tokens.TagCloseSelfclose(padding="")], - wrap([Tag(wraptext("br"), self_closing=True, invalid=True)])), - # [[Source]] - ([tokens.TagOpenOpen(), tokens.Text(text="ref"), - tokens.TagAttrStart(pad_first=" ", pad_before_eq="", - pad_after_eq=""), - tokens.Text(text="name"), tokens.TagAttrEquals(), - tokens.TemplateOpen(), tokens.Text(text="abc"), - tokens.TemplateClose(), - tokens.TagAttrStart(pad_first=" ", pad_before_eq="", - pad_after_eq=""), - tokens.Text(text="foo"), tokens.TagAttrEquals(), - tokens.TagAttrQuote(char='"'), tokens.Text(text="bar "), - tokens.TemplateOpen(), tokens.Text(text="baz"), - tokens.TemplateClose(), - tokens.TagAttrStart(pad_first=" ", pad_before_eq="", - pad_after_eq=""), - tokens.Text(text="abc"), tokens.TagAttrEquals(), - tokens.TemplateOpen(), tokens.Text(text="de"), - tokens.TemplateClose(), tokens.Text(text="f"), - tokens.TagAttrStart(pad_first=" ", pad_before_eq="", - pad_after_eq=""), - tokens.Text(text="ghi"), tokens.TagAttrEquals(), - tokens.Text(text="j"), tokens.TemplateOpen(), - tokens.Text(text="k"), tokens.TemplateClose(), - tokens.TemplateOpen(), tokens.Text(text="l"), - tokens.TemplateClose(), - tokens.TagAttrStart(pad_first=" \n ", pad_before_eq=" ", - pad_after_eq=" "), - tokens.Text(text="mno"), tokens.TagAttrEquals(), - tokens.TagAttrQuote(char="'"), tokens.TemplateOpen(), - tokens.Text(text="p"), tokens.TemplateClose(), - tokens.Text(text=" "), tokens.WikilinkOpen(), - tokens.Text(text="q"), tokens.WikilinkClose(), - tokens.Text(text=" "), tokens.TemplateOpen(), - tokens.Text(text="r"), tokens.TemplateClose(), - tokens.TagCloseOpen(padding=""), tokens.WikilinkOpen(), - tokens.Text(text="Source"), tokens.WikilinkClose(), - tokens.TagOpenClose(), tokens.Text(text="ref"), - tokens.TagCloseClose()], - wrap([Tag(wraptext("ref"), wrap([Wikilink(wraptext("Source"))]), [ - Attribute(wraptext("name"), - wrap([Template(wraptext("abc"))]), None), - Attribute(wraptext("foo"), wrap([Text("bar "), - Template(wraptext("baz"))]), pad_first=" "), - Attribute(wraptext("abc"), wrap([Template(wraptext("de")), - Text("f")]), None), - Attribute(wraptext("ghi"), wrap([Text("j"), - Template(wraptext("k")), - Template(wraptext("l"))]), None), - Attribute(wraptext("mno"), wrap([Template(wraptext("p")), - Text(" "), Wikilink(wraptext("q")), Text(" "), - Template(wraptext("r"))]), "'", " \n ", " ", - " ")])])), - - # "''italic text''" - ([tokens.TagOpenOpen(wiki_markup="''"), tokens.Text(text="i"), - tokens.TagCloseOpen(), tokens.Text(text="italic text"), - tokens.TagOpenClose(), tokens.Text(text="i"), - tokens.TagCloseClose()], - wrap([Tag(wraptext("i"), wraptext("italic text"), - wiki_markup="''")])), - - # * bullet - ([tokens.TagOpenOpen(wiki_markup="*"), tokens.Text(text="li"), - tokens.TagCloseSelfclose(), tokens.Text(text=" bullet")], - wrap([Tag(wraptext("li"), wiki_markup="*", self_closing=True), - Text(" bullet")])), -]) +@pytest.mark.parametrize( + "test,valid", + [ + # + ( + [ + tokens.TagOpenOpen(), + tokens.Text(text="ref"), + tokens.TagCloseOpen(padding=""), + tokens.TagOpenClose(), + tokens.Text(text="ref"), + tokens.TagCloseClose(), + ], + wrap([Tag(wraptext("ref"), wrap([]), closing_tag=wraptext("ref"))]), + ), + # + ( + [ + tokens.TagOpenOpen(), + tokens.Text(text="ref"), + tokens.TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), + tokens.Text(text="name"), + tokens.TagCloseOpen(padding=""), + tokens.TagOpenClose(), + tokens.Text(text="ref"), + tokens.TagCloseClose(), + ], + wrap([Tag(wraptext("ref"), wrap([]), attrs=[Attribute(wraptext("name"))])]), + ), + # + ( + [ + tokens.TagOpenOpen(), + tokens.Text(text="ref"), + tokens.TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), + tokens.Text(text="name"), + tokens.TagAttrEquals(), + tokens.TagAttrQuote(char='"'), + tokens.Text(text="abc"), + tokens.TagCloseSelfclose(padding=" "), + ], + wrap( + [ + Tag( + wraptext("ref"), + attrs=[Attribute(wraptext("name"), wraptext("abc"))], + self_closing=True, + padding=" ", + ) + ] + ), + ), + #
    + ( + [ + tokens.TagOpenOpen(), + tokens.Text(text="br"), + tokens.TagCloseSelfclose(padding=""), + ], + wrap([Tag(wraptext("br"), self_closing=True)]), + ), + #
  • + ( + [ + tokens.TagOpenOpen(), + tokens.Text(text="li"), + tokens.TagCloseSelfclose(padding="", implicit=True), + ], + wrap([Tag(wraptext("li"), self_closing=True, implicit=True)]), + ), + #
    + ( + [ + tokens.TagOpenOpen(invalid=True), + tokens.Text(text="br"), + tokens.TagCloseSelfclose(padding="", implicit=True), + ], + wrap([Tag(wraptext("br"), self_closing=True, invalid=True, implicit=True)]), + ), + #
    + ( + [ + tokens.TagOpenOpen(invalid=True), + tokens.Text(text="br"), + tokens.TagCloseSelfclose(padding=""), + ], + wrap([Tag(wraptext("br"), self_closing=True, invalid=True)]), + ), + # [[Source]] + ( + [ + tokens.TagOpenOpen(), + tokens.Text(text="ref"), + tokens.TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), + tokens.Text(text="name"), + tokens.TagAttrEquals(), + tokens.TemplateOpen(), + tokens.Text(text="abc"), + tokens.TemplateClose(), + tokens.TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), + tokens.Text(text="foo"), + tokens.TagAttrEquals(), + tokens.TagAttrQuote(char='"'), + tokens.Text(text="bar "), + tokens.TemplateOpen(), + tokens.Text(text="baz"), + tokens.TemplateClose(), + tokens.TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), + tokens.Text(text="abc"), + tokens.TagAttrEquals(), + tokens.TemplateOpen(), + tokens.Text(text="de"), + tokens.TemplateClose(), + tokens.Text(text="f"), + tokens.TagAttrStart(pad_first=" ", pad_before_eq="", pad_after_eq=""), + tokens.Text(text="ghi"), + tokens.TagAttrEquals(), + tokens.Text(text="j"), + tokens.TemplateOpen(), + tokens.Text(text="k"), + tokens.TemplateClose(), + tokens.TemplateOpen(), + tokens.Text(text="l"), + tokens.TemplateClose(), + tokens.TagAttrStart( + pad_first=" \n ", pad_before_eq=" ", pad_after_eq=" " + ), + tokens.Text(text="mno"), + tokens.TagAttrEquals(), + tokens.TagAttrQuote(char="'"), + tokens.TemplateOpen(), + tokens.Text(text="p"), + tokens.TemplateClose(), + tokens.Text(text=" "), + tokens.WikilinkOpen(), + tokens.Text(text="q"), + tokens.WikilinkClose(), + tokens.Text(text=" "), + tokens.TemplateOpen(), + tokens.Text(text="r"), + tokens.TemplateClose(), + tokens.TagCloseOpen(padding=""), + tokens.WikilinkOpen(), + tokens.Text(text="Source"), + tokens.WikilinkClose(), + tokens.TagOpenClose(), + tokens.Text(text="ref"), + tokens.TagCloseClose(), + ], + wrap( + [ + Tag( + wraptext("ref"), + wrap([Wikilink(wraptext("Source"))]), + [ + Attribute( + wraptext("name"), + wrap([Template(wraptext("abc"))]), + None, + ), + Attribute( + wraptext("foo"), + wrap([Text("bar "), Template(wraptext("baz"))]), + pad_first=" ", + ), + Attribute( + wraptext("abc"), + wrap([Template(wraptext("de")), Text("f")]), + None, + ), + Attribute( + wraptext("ghi"), + wrap( + [ + Text("j"), + Template(wraptext("k")), + Template(wraptext("l")), + ] + ), + None, + ), + Attribute( + wraptext("mno"), + wrap( + [ + Template(wraptext("p")), + Text(" "), + Wikilink(wraptext("q")), + Text(" "), + Template(wraptext("r")), + ] + ), + "'", + " \n ", + " ", + " ", + ), + ], + ) + ] + ), + ), + # "''italic text''" + ( + [ + tokens.TagOpenOpen(wiki_markup="''"), + tokens.Text(text="i"), + tokens.TagCloseOpen(), + tokens.Text(text="italic text"), + tokens.TagOpenClose(), + tokens.Text(text="i"), + tokens.TagCloseClose(), + ], + wrap([Tag(wraptext("i"), wraptext("italic text"), wiki_markup="''")]), + ), + # * bullet + ( + [ + tokens.TagOpenOpen(wiki_markup="*"), + tokens.Text(text="li"), + tokens.TagCloseSelfclose(), + tokens.Text(text=" bullet"), + ], + wrap( + [ + Tag(wraptext("li"), wiki_markup="*", self_closing=True), + Text(" bullet"), + ] + ), + ), + ], +) def test_tag(builder, test, valid): """tests for building Tag nodes""" assert_wikicode_equal(valid, builder.build(test)) + def test_integration(builder): """a test for building a combination of templates together""" # {{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}} - test = [tokens.TemplateOpen(), tokens.TemplateOpen(), - tokens.TemplateOpen(), tokens.TemplateOpen(), - tokens.Text(text="foo"), tokens.TemplateClose(), - tokens.Text(text="bar"), tokens.TemplateParamSeparator(), - tokens.Text(text="baz"), tokens.TemplateParamEquals(), - tokens.Text(text="biz"), tokens.TemplateClose(), - tokens.Text(text="buzz"), tokens.TemplateClose(), - tokens.Text(text="usr"), tokens.TemplateParamSeparator(), - tokens.TemplateOpen(), tokens.Text(text="bin"), - tokens.TemplateClose(), tokens.TemplateClose()] + test = [ + tokens.TemplateOpen(), + tokens.TemplateOpen(), + tokens.TemplateOpen(), + tokens.TemplateOpen(), + tokens.Text(text="foo"), + tokens.TemplateClose(), + tokens.Text(text="bar"), + tokens.TemplateParamSeparator(), + tokens.Text(text="baz"), + tokens.TemplateParamEquals(), + tokens.Text(text="biz"), + tokens.TemplateClose(), + tokens.Text(text="buzz"), + tokens.TemplateClose(), + tokens.Text(text="usr"), + tokens.TemplateParamSeparator(), + tokens.TemplateOpen(), + tokens.Text(text="bin"), + tokens.TemplateClose(), + tokens.TemplateClose(), + ] valid = wrap( - [Template(wrap([Template(wrap([Template(wrap([Template(wraptext( - "foo")), Text("bar")]), params=[Parameter(wraptext("baz"), - wraptext("biz"))]), Text("buzz")])), Text("usr")]), params=[ - Parameter(wraptext("1"), wrap([Template(wraptext("bin"))]), - showkey=False)])]) + [ + Template( + wrap( + [ + Template( + wrap( + [ + Template( + wrap([Template(wraptext("foo")), Text("bar")]), + params=[ + Parameter(wraptext("baz"), wraptext("biz")) + ], + ), + Text("buzz"), + ] + ) + ), + Text("usr"), + ] + ), + params=[ + Parameter( + wraptext("1"), wrap([Template(wraptext("bin"))]), showkey=False + ) + ], + ) + ] + ) assert_wikicode_equal(valid, builder.build(test)) + def test_integration2(builder): """an even more audacious test for building a horrible wikicode mess""" # {{a|b|{{c|[[d]]{{{e}}}}}}}[[f|{{{g}}}]]{{i|j= }} - test = [tokens.TemplateOpen(), tokens.Text(text="a"), - tokens.TemplateParamSeparator(), tokens.Text(text="b"), - tokens.TemplateParamSeparator(), tokens.TemplateOpen(), - tokens.Text(text="c"), tokens.TemplateParamSeparator(), - tokens.WikilinkOpen(), tokens.Text(text="d"), - tokens.WikilinkClose(), tokens.ArgumentOpen(), - tokens.Text(text="e"), tokens.ArgumentClose(), - tokens.TemplateClose(), tokens.TemplateClose(), - tokens.WikilinkOpen(), tokens.Text(text="f"), - tokens.WikilinkSeparator(), tokens.ArgumentOpen(), - tokens.Text(text="g"), tokens.ArgumentClose(), - tokens.CommentStart(), tokens.Text(text="h"), - tokens.CommentEnd(), tokens.WikilinkClose(), - tokens.TemplateOpen(), tokens.Text(text="i"), - tokens.TemplateParamSeparator(), tokens.Text(text="j"), - tokens.TemplateParamEquals(), tokens.HTMLEntityStart(), - tokens.Text(text="nbsp"), tokens.HTMLEntityEnd(), - tokens.TemplateClose()] + test = [ + tokens.TemplateOpen(), + tokens.Text(text="a"), + tokens.TemplateParamSeparator(), + tokens.Text(text="b"), + tokens.TemplateParamSeparator(), + tokens.TemplateOpen(), + tokens.Text(text="c"), + tokens.TemplateParamSeparator(), + tokens.WikilinkOpen(), + tokens.Text(text="d"), + tokens.WikilinkClose(), + tokens.ArgumentOpen(), + tokens.Text(text="e"), + tokens.ArgumentClose(), + tokens.TemplateClose(), + tokens.TemplateClose(), + tokens.WikilinkOpen(), + tokens.Text(text="f"), + tokens.WikilinkSeparator(), + tokens.ArgumentOpen(), + tokens.Text(text="g"), + tokens.ArgumentClose(), + tokens.CommentStart(), + tokens.Text(text="h"), + tokens.CommentEnd(), + tokens.WikilinkClose(), + tokens.TemplateOpen(), + tokens.Text(text="i"), + tokens.TemplateParamSeparator(), + tokens.Text(text="j"), + tokens.TemplateParamEquals(), + tokens.HTMLEntityStart(), + tokens.Text(text="nbsp"), + tokens.HTMLEntityEnd(), + tokens.TemplateClose(), + ] valid = wrap( - [Template(wraptext("a"), params=[Parameter(wraptext("1"), wraptext( - "b"), showkey=False), Parameter(wraptext("2"), wrap([Template( - wraptext("c"), params=[Parameter(wraptext("1"), wrap([Wikilink( - wraptext("d")), Argument(wraptext("e"))]), showkey=False)])]), - showkey=False)]), Wikilink(wraptext("f"), wrap([Argument(wraptext( - "g")), Comment("h")])), Template(wraptext("i"), params=[ - Parameter(wraptext("j"), wrap([HTMLEntity("nbsp", - named=True)]))])]) + [ + Template( + wraptext("a"), + params=[ + Parameter(wraptext("1"), wraptext("b"), showkey=False), + Parameter( + wraptext("2"), + wrap( + [ + Template( + wraptext("c"), + params=[ + Parameter( + wraptext("1"), + wrap( + [ + Wikilink(wraptext("d")), + Argument(wraptext("e")), + ] + ), + showkey=False, + ) + ], + ) + ] + ), + showkey=False, + ), + ], + ), + Wikilink(wraptext("f"), wrap([Argument(wraptext("g")), Comment("h")])), + Template( + wraptext("i"), + params=[ + Parameter(wraptext("j"), wrap([HTMLEntity("nbsp", named=True)])) + ], + ), + ] + ) assert_wikicode_equal(valid, builder.build(test)) -@pytest.mark.parametrize("tokens", [ - [tokens.TemplateOpen(), tokens.TemplateParamSeparator()], - [tokens.TemplateOpen()], [tokens.ArgumentOpen()], - [tokens.WikilinkOpen()], [tokens.ExternalLinkOpen()], - [tokens.HeadingStart()], [tokens.CommentStart()], - [tokens.TagOpenOpen(), tokens.TagAttrStart()], - [tokens.TagOpenOpen()] -]) + +@pytest.mark.parametrize( + "tokens", + [ + [tokens.TemplateOpen(), tokens.TemplateParamSeparator()], + [tokens.TemplateOpen()], + [tokens.ArgumentOpen()], + [tokens.WikilinkOpen()], + [tokens.ExternalLinkOpen()], + [tokens.HeadingStart()], + [tokens.CommentStart()], + [tokens.TagOpenOpen(), tokens.TagAttrStart()], + [tokens.TagOpenOpen()], + ], +) def test_parser_errors(builder, tokens): """test whether ParserError gets thrown for bad input""" with pytest.raises(ParserError): builder.build(tokens) + def test_parser_errors_templateclose(builder): with pytest.raises( - ParserError, - match=r"_handle_token\(\) got unexpected TemplateClose" + ParserError, match=r"_handle_token\(\) got unexpected TemplateClose" ): builder.build([tokens.TemplateClose()]) diff --git a/tests/test_comment.py b/tests/test_comment.py index bac17a1..3d3c657 100644 --- a/tests/test_comment.py +++ b/tests/test_comment.py @@ -26,11 +26,13 @@ import pytest from mwparserfromhell.nodes import Comment + def test_str(): """test Comment.__str__()""" node = Comment("foobar") assert "" == str(node) + def test_children(): """test Comment.__children__()""" node = Comment("foobar") @@ -38,11 +40,13 @@ def test_children(): with pytest.raises(StopIteration): next(gen) + def test_strip(): """test Comment.__strip__()""" node = Comment("foobar") assert node.__strip__() is None + def test_showtree(): """test Comment.__showtree__()""" output = [] @@ -50,6 +54,7 @@ def test_showtree(): node.__showtree__(output.append, None, None) assert [""] == output + def test_contents(): """test getter/setter for the contents attribute""" node = Comment("foobar") diff --git a/tests/test_docs.py b/tests/test_docs.py index ed77c7e..8ac8f6f 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -32,6 +32,7 @@ import pytest import mwparserfromhell + def assert_print(value, output): """Assertion check that *value*, when printed, produces *output*.""" buff = StringIO() @@ -39,6 +40,7 @@ def assert_print(value, output): buff.seek(0) assert output == buff.read() + def test_readme_1(): """test a block of example code in the README""" text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" @@ -52,6 +54,7 @@ def test_readme_1(): assert_print(template.get(1).value, "bar") assert_print(template.get("eggs").value, "spam") + def test_readme_2(): """test a block of example code in the README""" text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" @@ -59,17 +62,19 @@ def test_readme_2(): res = "['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']" assert_print(temps, res) + def test_readme_3(): """test a block of example code in the README""" code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") - assert_print(code.filter_templates(recursive=False), - "['{{foo|this {{includes a|template}}}}']") + assert_print( + code.filter_templates(recursive=False), + "['{{foo|this {{includes a|template}}}}']", + ) foo = code.filter_templates(recursive=False)[0] assert_print(foo.get(1).value, "this {{includes a|template}}") - assert_print(foo.get(1).value.filter_templates()[0], - "{{includes a|template}}") - assert_print(foo.get(1).value.filter_templates()[0].get(1).value, - "template") + assert_print(foo.get(1).value.filter_templates()[0], "{{includes a|template}}") + assert_print(foo.get(1).value.filter_templates()[0].get(1).value, "template") + def test_readme_4(): """test a block of example code in the README""" @@ -90,6 +95,7 @@ def test_readme_4(): assert_print(text, res) assert text == code + @pytest.mark.skipif("NOWEB" in os.environ, reason="web test disabled by environ var") def test_readme_5(): """test a block of example code in the README; includes a web call""" diff --git a/tests/test_external_link.py b/tests/test_external_link.py index 1f9d779..5fda1c8 100644 --- a/tests/test_external_link.py +++ b/tests/test_external_link.py @@ -27,6 +27,7 @@ import pytest from mwparserfromhell.nodes import ExternalLink, Text from .conftest import assert_wikicode_equal, wrap, wraptext + def test_str(): """test ExternalLink.__str__()""" node = ExternalLink(wraptext("http://example.com/"), brackets=False) @@ -35,15 +36,16 @@ def test_str(): assert "[http://example.com/]" == str(node2) node3 = ExternalLink(wraptext("http://example.com/"), wrap([])) assert "[http://example.com/ ]" == str(node3) - node4 = ExternalLink(wraptext("http://example.com/"), - wraptext("Example Web Page")) + node4 = ExternalLink(wraptext("http://example.com/"), wraptext("Example Web Page")) assert "[http://example.com/ Example Web Page]" == str(node4) + def test_children(): """test ExternalLink.__children__()""" node1 = ExternalLink(wraptext("http://example.com/"), brackets=False) - node2 = ExternalLink(wraptext("http://example.com/"), - wrap([Text("Example"), Text("Page")])) + node2 = ExternalLink( + wraptext("http://example.com/"), wrap([Text("Example"), Text("Page")]) + ) gen1 = node1.__children__() gen2 = node2.__children__() assert node1.url == next(gen1) @@ -54,6 +56,7 @@ def test_children(): with pytest.raises(StopIteration): next(gen2) + def test_strip(): """test ExternalLink.__strip__()""" node1 = ExternalLink(wraptext("http://example.com"), brackets=False) @@ -66,6 +69,7 @@ def test_strip(): assert node3.__strip__() is None assert "Link" == node4.__strip__() + def test_showtree(): """test ExternalLink.__showtree__()""" output = [] @@ -76,11 +80,10 @@ def test_showtree(): node2 = ExternalLink(wraptext("http://example.com"), wraptext("Link")) node1.__showtree__(output.append, get, mark) node2.__showtree__(output.append, get, mark) - valid = [ - (getter, node1.url), "[", (getter, node2.url), - (getter, node2.title), "]"] + valid = [(getter, node1.url), "[", (getter, node2.url), (getter, node2.title), "]"] assert valid == output + def test_url(): """test getter/setter for the url attribute""" url = wraptext("http://example.com/") @@ -93,6 +96,7 @@ def test_url(): assert_wikicode_equal(wraptext("mailto:héhehé@spam.com"), node1.url) assert_wikicode_equal(wraptext("mailto:héhehé@spam.com"), node2.url) + def test_title(): """test getter/setter for the title attribute""" title = wraptext("Example!") @@ -105,6 +109,7 @@ def test_title(): node2.title = "My Website" assert_wikicode_equal(wraptext("My Website"), node2.title) + def test_brackets(): """test getter/setter for the brackets attribute""" node1 = ExternalLink(wraptext("http://example.com/"), brackets=False) diff --git a/tests/test_heading.py b/tests/test_heading.py index 0eb03ed..dbd01c6 100644 --- a/tests/test_heading.py +++ b/tests/test_heading.py @@ -27,6 +27,7 @@ import pytest from mwparserfromhell.nodes import Heading, Text from .conftest import assert_wikicode_equal, wrap, wraptext + def test_str(): """test Heading.__str__()""" node = Heading(wraptext("foobar"), 2) @@ -34,6 +35,7 @@ def test_str(): node2 = Heading(wraptext(" zzz "), 5) assert "===== zzz =====" == str(node2) + def test_children(): """test Heading.__children__()""" node = Heading(wrap([Text("foo"), Text("bar")]), 3) @@ -42,11 +44,13 @@ def test_children(): with pytest.raises(StopIteration): next(gen) + def test_strip(): """test Heading.__strip__()""" node = Heading(wraptext("foobar"), 3) assert "foobar" == node.__strip__() + def test_showtree(): """test Heading.__showtree__()""" output = [] @@ -56,10 +60,10 @@ def test_showtree(): node2 = Heading(wraptext(" baz "), 4) node1.__showtree__(output.append, get, None) node2.__showtree__(output.append, get, None) - valid = ["===", (getter, node1.title), "===", - "====", (getter, node2.title), "===="] + valid = ["===", (getter, node1.title), "===", "====", (getter, node2.title), "===="] assert valid == output + def test_title(): """test getter/setter for the title attribute""" title = wraptext("foobar") @@ -68,6 +72,7 @@ def test_title(): node.title = "héhehé" assert_wikicode_equal(wraptext("héhehé"), node.title) + def test_level(): """test getter/setter for the level attribute""" node = Heading(wraptext("foobar"), 3) diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py index 624cf71..3739ac4 100644 --- a/tests/test_html_entity.py +++ b/tests/test_html_entity.py @@ -26,6 +26,7 @@ import pytest from mwparserfromhell.nodes import HTMLEntity + def test_str(): """test HTMLEntity.__str__()""" node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) @@ -37,6 +38,7 @@ def test_str(): assert "k" == str(node3) assert "l" == str(node4) + def test_children(): """test HTMLEntity.__children__()""" node = HTMLEntity("nbsp", named=True, hexadecimal=False) @@ -44,6 +46,7 @@ def test_children(): with pytest.raises(StopIteration): next(gen) + def test_strip(): """test HTMLEntity.__strip__()""" node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) @@ -57,6 +60,7 @@ def test_strip(): assert "é" == node3.__strip__(normalize=True) assert "é" == node3.__strip__(normalize=False) + def test_showtree(): """test HTMLEntity.__showtree__()""" output = [] @@ -69,6 +73,7 @@ def test_showtree(): res = [" ", "k", "é"] assert res == output + def test_value(): """test getter/setter for the value attribute""" node1 = HTMLEntity("nbsp") @@ -109,6 +114,7 @@ def test_value(): with pytest.raises(ValueError): node1.__setattr__("value", "12FFFF") + def test_named(): """test getter/setter for the named attribute""" node1 = HTMLEntity("nbsp") @@ -130,6 +136,7 @@ def test_named(): with pytest.raises(ValueError): node3.__setattr__("named", True) + def test_hexadecimal(): """test getter/setter for the hexadecimal attribute""" node1 = HTMLEntity("nbsp") @@ -147,6 +154,7 @@ def test_hexadecimal(): with pytest.raises(ValueError): node1.__setattr__("hexadecimal", True) + def test_hex_char(): """test getter/setter for the hex_char attribute""" node1 = HTMLEntity("e9") @@ -164,6 +172,7 @@ def test_hex_char(): with pytest.raises(ValueError): node1.__setattr__("hex_char", True) + def test_normalize(): """test getter/setter for the normalize attribute""" node1 = HTMLEntity("nbsp") diff --git a/tests/test_parameter.py b/tests/test_parameter.py index 92b3c25..68d8519 100644 --- a/tests/test_parameter.py +++ b/tests/test_parameter.py @@ -27,6 +27,7 @@ import pytest from mwparserfromhell.nodes.extras import Parameter from .conftest import assert_wikicode_equal, wraptext + def test_str(): """test Parameter.__str__()""" node = Parameter(wraptext("1"), wraptext("foo"), showkey=False) @@ -34,6 +35,7 @@ def test_str(): node2 = Parameter(wraptext("foo"), wraptext("bar")) assert "foo=bar" == str(node2) + def test_name(): """test getter/setter for the name attribute""" name1 = wraptext("1") @@ -47,6 +49,7 @@ def test_name(): assert_wikicode_equal(wraptext("héhehé"), node1.name) assert_wikicode_equal(wraptext("héhehé"), node2.name) + def test_value(): """test getter/setter for the value attribute""" value = wraptext("bar") @@ -55,6 +58,7 @@ def test_value(): node.value = "héhehé" assert_wikicode_equal(wraptext("héhehé"), node.value) + def test_showkey(): """test getter/setter for the showkey attribute""" node1 = Parameter(wraptext("1"), wraptext("foo"), showkey=False) diff --git a/tests/test_parser.py b/tests/test_parser.py index fe479f6..8ee7b53 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -29,6 +29,7 @@ from mwparserfromhell.nodes import Tag, Template, Text, Wikilink from mwparserfromhell.nodes.extras import Parameter from .conftest import assert_wikicode_equal, wrap, wraptext + @pytest.fixture() def pyparser(): """make sure the correct tokenizer is used""" @@ -38,37 +39,60 @@ def pyparser(): yield parser.use_c = restore + def test_use_c(pyparser): assert parser.Parser()._tokenizer.USES_C is False + def test_parsing(pyparser): """integration test for parsing overall""" text = "this is text; {{this|is=a|template={{with|[[links]]|in}}it}}" - expected = wrap([ - Text("this is text; "), - Template(wraptext("this"), [ - Parameter(wraptext("is"), wraptext("a")), - Parameter(wraptext("template"), wrap([ - Template(wraptext("with"), [ - Parameter(wraptext("1"), - wrap([Wikilink(wraptext("links"))]), - showkey=False), - Parameter(wraptext("2"), - wraptext("in"), showkey=False) - ]), - Text("it") - ])) - ]) - ]) + expected = wrap( + [ + Text("this is text; "), + Template( + wraptext("this"), + [ + Parameter(wraptext("is"), wraptext("a")), + Parameter( + wraptext("template"), + wrap( + [ + Template( + wraptext("with"), + [ + Parameter( + wraptext("1"), + wrap([Wikilink(wraptext("links"))]), + showkey=False, + ), + Parameter( + wraptext("2"), wraptext("in"), showkey=False + ), + ], + ), + Text("it"), + ] + ), + ), + ], + ), + ] + ) actual = parser.Parser().parse(text) assert_wikicode_equal(expected, actual) + def test_skip_style_tags(pyparser): """test Parser.parse(skip_style_tags=True)""" text = "This is an example with ''italics''!" - a = wrap([Text("This is an example with "), - Tag(wraptext("i"), wraptext("italics"), wiki_markup="''"), - Text("!")]) + a = wrap( + [ + Text("This is an example with "), + Tag(wraptext("i"), wraptext("italics"), wiki_markup="''"), + Text("!"), + ] + ) b = wraptext("This is an example with ''italics''!") with_style = parser.Parser().parse(text, skip_style_tags=False) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 8766974..54ac00c 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -27,6 +27,7 @@ import pytest from mwparserfromhell.smart_list import SmartList from mwparserfromhell.smart_list.list_proxy import ListProxy + def _test_get_set_del_item(builder): """Run tests on __get/set/delitem__ of a list built with *builder*.""" list1 = builder([0, 1, 2, 3, "one", "two"]) @@ -104,6 +105,7 @@ def _test_get_set_del_item(builder): del list2[2:8:2] assert [0, 1, 3, 5, 7, 8, 9] == list2 + def _test_add_radd_iadd(builder): """Run tests on __r/i/add__ of a list built with *builder*.""" list1 = builder(range(5)) @@ -116,6 +118,7 @@ def _test_add_radd_iadd(builder): list1 += ["foo", "bar", "baz"] assert [0, 1, 2, 3, 4, "foo", "bar", "baz"] == list1 + def _test_other_magic_methods(builder): """Run tests on other magic methods of a list built with *builder*.""" list1 = builder([0, 1, 2, 3, "one", "two"]) @@ -200,6 +203,7 @@ def _test_other_magic_methods(builder): list4 *= 2 assert [0, 1, 2, 0, 1, 2] == list4 + def _test_list_methods(builder): """Run tests on the public methods of a list built with *builder*.""" list1 = builder(range(5)) @@ -263,6 +267,7 @@ def _test_list_methods(builder): list3.sort(key=lambda i: i[1], reverse=True) assert [("b", 8), ("a", 5), ("c", 3), ("d", 2)] == list3 + def _dispatch_test_for_children(meth): """Run a test method on various different types of children.""" meth(lambda L: SmartList(list(L))[:]) @@ -270,10 +275,20 @@ def _dispatch_test_for_children(meth): meth(lambda L: SmartList(list(L) + [999])[:-1]) meth(lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]) + def test_docs(): """make sure the methods of SmartList/ListProxy have docstrings""" - methods = ["append", "count", "extend", "index", "insert", "pop", - "remove", "reverse", "sort"] + methods = [ + "append", + "count", + "extend", + "index", + "insert", + "pop", + "remove", + "reverse", + "sort", + ] for meth in methods: expected = getattr(list, meth).__doc__ smartlist_doc = getattr(SmartList, meth).__doc__ @@ -281,6 +296,7 @@ def test_docs(): assert expected == smartlist_doc assert expected == listproxy_doc + def test_doctest(): """make sure the test embedded in SmartList's docstring passes""" parent = SmartList([0, 1, 2, 3]) @@ -291,38 +307,47 @@ def test_doctest(): assert [2, 3, 4] == child assert [0, 1, 2, 3, 4] == parent + def test_parent_get_set_del(): """make sure SmartList's getitem/setitem/delitem work""" _test_get_set_del_item(SmartList) + def test_parent_add(): """make sure SmartList's add/radd/iadd work""" _test_add_radd_iadd(SmartList) + def test_parent_other_magics(): """make sure SmartList's other magically implemented features work""" _test_other_magic_methods(SmartList) + def test_parent_methods(): """make sure SmartList's non-magic methods work, like append()""" _test_list_methods(SmartList) + def test_child_get_set_del(): """make sure ListProxy's getitem/setitem/delitem work""" _dispatch_test_for_children(_test_get_set_del_item) + def test_child_add(): """make sure ListProxy's add/radd/iadd work""" _dispatch_test_for_children(_test_add_radd_iadd) + def test_child_other_magics(): """make sure ListProxy's other magically implemented features work""" _dispatch_test_for_children(_test_other_magic_methods) + def test_child_methods(): """make sure ListProxy's non-magic methods work, like append()""" _dispatch_test_for_children(_test_list_methods) + def test_influence(): """make sure changes are propagated from parents to children""" parent = SmartList([0, 1, 2, 3, 4, 5]) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 552b2ef..f362b45 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -29,6 +29,7 @@ import pytest from mwparserfromhell.string_mixin import StringMixIn + class _FakeString(StringMixIn): def __init__(self, data): self._data = data @@ -36,22 +37,63 @@ class _FakeString(StringMixIn): def __str__(self): return self._data -@pytest.mark.parametrize('method', [ - "capitalize", "casefold", "center", "count", "encode", "endswith", - "expandtabs", "find", "format", "format_map", "index", "isalnum", - "isalpha", "isdecimal", "isdigit", "isidentifier", "islower", - "isnumeric", "isprintable", "isspace", "istitle", "isupper", - "join", "ljust", "lower", "lstrip", "maketrans", "partition", - "replace", "rfind", "rindex", "rjust", "rpartition", "rsplit", - "rstrip", "split", "splitlines", "startswith", "strip", "swapcase", - "title", "translate", "upper", "zfill" -]) + +@pytest.mark.parametrize( + "method", + [ + "capitalize", + "casefold", + "center", + "count", + "encode", + "endswith", + "expandtabs", + "find", + "format", + "format_map", + "index", + "isalnum", + "isalpha", + "isdecimal", + "isdigit", + "isidentifier", + "islower", + "isnumeric", + "isprintable", + "isspace", + "istitle", + "isupper", + "join", + "ljust", + "lower", + "lstrip", + "maketrans", + "partition", + "replace", + "rfind", + "rindex", + "rjust", + "rpartition", + "rsplit", + "rstrip", + "split", + "splitlines", + "startswith", + "strip", + "swapcase", + "title", + "translate", + "upper", + "zfill", + ], +) def test_docs(method): """make sure the various methods of StringMixIn have docstrings""" expected = getattr("foo", method).__doc__ actual = getattr(_FakeString("foo"), method).__doc__ assert expected == actual + def test_types(): """make sure StringMixIns convert to different types correctly""" fstr = _FakeString("fake string") @@ -63,6 +105,7 @@ def test_types(): assert isinstance(bytes(fstr), bytes) assert isinstance(repr(fstr), str) + def test_comparisons(): """make sure comparison operators work""" str1 = _FakeString("this is a fake string") @@ -99,6 +142,7 @@ def test_comparisons(): assert str5 < str1 assert str5 <= str1 + def test_other_magics(): """test other magically implemented features, like len() and iter()""" str1 = _FakeString("fake string") @@ -154,6 +198,7 @@ def test_other_magics(): assert "real" not in str1 assert "s" not in str2 + def test_other_methods(): """test the remaining non-magic methods of StringMixIn""" str1 = _FakeString("fake string") @@ -354,8 +399,21 @@ def test_other_methods(): actual = ["this", "is", "a", "sentence", "with", "whitespace"] assert actual == str25.rsplit() assert actual == str25.rsplit(None) - actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", - "", "whitespace", ""] + actual = [ + "", + "", + "", + "this", + "is", + "a", + "", + "", + "sentence", + "with", + "", + "whitespace", + "", + ] assert actual == str25.rsplit(" ") actual = [" this is a", "sentence", "with", "whitespace"] assert actual == str25.rsplit(None, 3) @@ -371,8 +429,21 @@ def test_other_methods(): actual = ["this", "is", "a", "sentence", "with", "whitespace"] assert actual == str25.split() assert actual == str25.split(None) - actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", - "", "whitespace", ""] + actual = [ + "", + "", + "", + "this", + "is", + "a", + "", + "", + "sentence", + "with", + "", + "whitespace", + "", + ] assert actual == str25.split(" ") actual = ["this", "is", "a", "sentence with whitespace "] assert actual == str25.split(None, 3) @@ -382,10 +453,15 @@ def test_other_methods(): assert actual == str25.split(maxsplit=3) str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") - assert ["lines", "of", "text", "are", "presented", "here"] \ - == str26.splitlines() - assert ["lines\n", "of\n", "text\r\n", "are\r\n", "presented\n", "here"] \ - == str26.splitlines(True) + assert ["lines", "of", "text", "are", "presented", "here"] == str26.splitlines() + assert [ + "lines\n", + "of\n", + "text\r\n", + "are\r\n", + "presented\n", + "here", + ] == str26.splitlines(True) assert str1.startswith("fake") is True assert str1.startswith("faker") is False @@ -398,8 +474,7 @@ def test_other_methods(): assert "Fake String" == str1.title() - table1 = StringMixIn.maketrans({97: "1", 101: "2", 105: "3", - 111: "4", 117: "5"}) + table1 = StringMixIn.maketrans({97: "1", 101: "2", 105: "3", 111: "4", 117: "5"}) table2 = StringMixIn.maketrans("aeiou", "12345") table3 = StringMixIn.maketrans("aeiou", "12345", "rts") assert "f1k2 str3ng" == str1.translate(table1) diff --git a/tests/test_tag.py b/tests/test_tag.py index c5549d0..1297e2f 100644 --- a/tests/test_tag.py +++ b/tests/test_tag.py @@ -34,21 +34,20 @@ agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), None) agenp = lambda name, v, a, b, c: Attribute(wraptext(name), v, '"', a, b, c) agenpnv = lambda name, a, b, c: Attribute(wraptext(name), None, '"', a, b, c) + def test_str(): """test Tag.__str__()""" node1 = Tag(wraptext("ref")) - node2 = Tag(wraptext("span"), wraptext("foo"), - [agen("style", "color: red;")]) - node3 = Tag(wraptext("ref"), - attrs=[agennq("name", "foo"), - agenpnv("some_attr", " ", "", "")], - self_closing=True) + node2 = Tag(wraptext("span"), wraptext("foo"), [agen("style", "color: red;")]) + node3 = Tag( + wraptext("ref"), + attrs=[agennq("name", "foo"), agenpnv("some_attr", " ", "", "")], + self_closing=True, + ) node4 = Tag(wraptext("br"), self_closing=True, padding=" ") node5 = Tag(wraptext("br"), self_closing=True, implicit=True) - node6 = Tag(wraptext("br"), self_closing=True, invalid=True, - implicit=True) - node7 = Tag(wraptext("br"), self_closing=True, invalid=True, - padding=" ") + node6 = Tag(wraptext("br"), self_closing=True, invalid=True, implicit=True) + node7 = Tag(wraptext("br"), self_closing=True, invalid=True, padding=" ") node8 = Tag(wraptext("hr"), wiki_markup="----", self_closing=True) node9 = Tag(wraptext("i"), wraptext("italics!"), wiki_markup="''") @@ -62,6 +61,7 @@ def test_str(): assert "----" == str(node8) assert "''italics!''" == str(node9) + def test_children(): """test Tag.__children__()""" # foobar @@ -69,10 +69,12 @@ def test_children(): # '''bold text''' node2 = Tag(wraptext("b"), wraptext("bold text"), wiki_markup="'''") # - node3 = Tag(wraptext("img"), - attrs=[agen("id", "foo"), agen("class", "bar"), - agennv("selected")], - self_closing=True, padding=" ") + node3 = Tag( + wraptext("img"), + attrs=[agen("id", "foo"), agen("class", "bar"), agennv("selected")], + self_closing=True, + padding=" ", + ) gen1 = node1.__children__() gen2 = node2.__children__() @@ -94,6 +96,7 @@ def test_children(): with pytest.raises(StopIteration): next(gen3) + def test_strip(): """test Tag.__strip__()""" node1 = Tag(wraptext("i"), wraptext("foobar")) @@ -104,28 +107,46 @@ def test_strip(): assert node2.__strip__() is None assert node3.__strip__() is None + def test_showtree(): """test Tag.__showtree__()""" output = [] getter, marker = object(), object() get = lambda code: output.append((getter, code)) mark = lambda: output.append(marker) - node1 = Tag(wraptext("ref"), wraptext("text"), - [agen("name", "foo"), agennv("selected")]) + node1 = Tag( + wraptext("ref"), wraptext("text"), [agen("name", "foo"), agennv("selected")] + ) node2 = Tag(wraptext("br"), self_closing=True, padding=" ") - node3 = Tag(wraptext("br"), self_closing=True, invalid=True, - implicit=True, padding=" ") + node3 = Tag( + wraptext("br"), self_closing=True, invalid=True, implicit=True, padding=" " + ) node1.__showtree__(output.append, get, mark) node2.__showtree__(output.append, get, mark) node3.__showtree__(output.append, get, mark) valid = [ - "<", (getter, node1.tag), (getter, node1.attributes[0].name), - " = ", marker, (getter, node1.attributes[0].value), - (getter, node1.attributes[1].name), ">", (getter, node1.contents), - "", "<", (getter, node2.tag), - "/>", ""] + "<", + (getter, node1.tag), + (getter, node1.attributes[0].name), + " = ", + marker, + (getter, node1.attributes[0].value), + (getter, node1.attributes[1].name), + ">", + (getter, node1.contents), + "", + "<", + (getter, node2.tag), + "/>", + "", + ] assert valid == output + def test_tag(): """test getter/setter for the tag attribute""" tag = wraptext("ref") @@ -137,6 +158,7 @@ def test_tag(): assert_wikicode_equal(wraptext("span"), node.closing_tag) assert "text" == node + def test_contents(): """test getter/setter for the contents attribute""" contents = wraptext("text") @@ -147,6 +169,7 @@ def test_contents(): assert_wikicode_equal(parsed, node.contents) assert "text and a {{template}}" == node + def test_attributes(): """test getter for the attributes attribute""" attrs = [agen("name", "bar")] @@ -155,6 +178,7 @@ def test_attributes(): assert [] == node1.attributes assert attrs is node2.attributes + def test_wiki_markup(): """test getter/setter for the wiki_markup attribute""" node = Tag(wraptext("i"), wraptext("italic text")) @@ -166,6 +190,7 @@ def test_wiki_markup(): assert node.wiki_markup is None assert "italic text" == node + def test_self_closing(): """test getter/setter for the self_closing attribute""" node = Tag(wraptext("ref"), wraptext("foobar")) @@ -177,6 +202,7 @@ def test_self_closing(): assert node.self_closing is False assert "foobar" == node + def test_invalid(): """test getter/setter for the invalid attribute""" node = Tag(wraptext("br"), self_closing=True, implicit=True) @@ -188,6 +214,7 @@ def test_invalid(): assert node.invalid is False assert "
    " == node + def test_implicit(): """test getter/setter for the implicit attribute""" node = Tag(wraptext("br"), self_closing=True) @@ -199,6 +226,7 @@ def test_implicit(): assert node.implicit is False assert "
    " == node + def test_padding(): """test getter/setter for the padding attribute""" node = Tag(wraptext("ref"), wraptext("foobar")) @@ -212,6 +240,7 @@ def test_padding(): with pytest.raises(ValueError): node.__setattr__("padding", True) + def test_closing_tag(): """test getter/setter for the closing_tag attribute""" tag = wraptext("ref") @@ -222,6 +251,7 @@ def test_closing_tag(): assert_wikicode_equal(parsed, node.closing_tag) assert "foobar" == node + def test_wiki_style_separator(): """test getter/setter for wiki_style_separator attribute""" node = Tag(wraptext("table"), wraptext("\n")) @@ -233,6 +263,7 @@ def test_wiki_style_separator(): node2 = Tag(wraptext("table"), wraptext("\n"), wiki_style_separator="|") assert "|" == node2.wiki_style_separator + def test_closing_wiki_markup(): """test getter/setter for closing_wiki_markup attribute""" node = Tag(wraptext("table"), wraptext("\n")) @@ -248,12 +279,17 @@ def test_closing_wiki_markup(): node.wiki_markup = False assert node.closing_wiki_markup is None assert "\n
    " == node - node2 = Tag(wraptext("table"), wraptext("\n"), - attrs=[agen("id", "foo")], wiki_markup="{|", - closing_wiki_markup="|}") + node2 = Tag( + wraptext("table"), + wraptext("\n"), + attrs=[agen("id", "foo")], + wiki_markup="{|", + closing_wiki_markup="|}", + ) assert "|}" == node2.closing_wiki_markup assert '{| id="foo"\n|}' == node2 + def test_has(): """test Tag.has()""" node = Tag(wraptext("ref"), wraptext("cite"), [agen("name", "foo")]) @@ -263,19 +299,26 @@ def test_has(): assert node.has("Name") is False assert node.has("foo") is False - attrs = [agen("id", "foo"), agenp("class", "bar", " ", "\n", "\n"), - agen("foo", "bar"), agenpnv("foo", " ", " \n ", " \t")] + attrs = [ + agen("id", "foo"), + agenp("class", "bar", " ", "\n", "\n"), + agen("foo", "bar"), + agenpnv("foo", " ", " \n ", " \t"), + ] node2 = Tag(wraptext("div"), attrs=attrs, self_closing=True) assert node2.has("id") is True assert node2.has("class") is True - assert node2.has(attrs[1].pad_first + str(attrs[1].name) + - attrs[1].pad_before_eq) is True + assert ( + node2.has(attrs[1].pad_first + str(attrs[1].name) + attrs[1].pad_before_eq) + is True + ) assert node2.has(attrs[3]) is True assert node2.has(str(attrs[3])) is True assert node2.has("idclass") is False assert node2.has("id class") is False assert node2.has("id=foo") is False + def test_get(): """test Tag.get()""" attrs = [agen("name", "foo")] @@ -288,13 +331,18 @@ def test_get(): with pytest.raises(ValueError): node.get("foo") - attrs = [agen("id", "foo"), agenp("class", "bar", " ", "\n", "\n"), - agen("foo", "bar"), agenpnv("foo", " ", " \n ", " \t")] + attrs = [ + agen("id", "foo"), + agenp("class", "bar", " ", "\n", "\n"), + agen("foo", "bar"), + agenpnv("foo", " ", " \n ", " \t"), + ] node2 = Tag(wraptext("div"), attrs=attrs, self_closing=True) assert attrs[0] is node2.get("id") assert attrs[1] is node2.get("class") assert attrs[1] is node2.get( - attrs[1].pad_first + str(attrs[1].name) + attrs[1].pad_before_eq) + attrs[1].pad_first + str(attrs[1].name) + attrs[1].pad_before_eq + ) assert attrs[3] is node2.get(attrs[3]) assert attrs[3] is node2.get(str(attrs[3])) assert attrs[3] is node2.get(" foo") @@ -305,6 +353,7 @@ def test_get(): with pytest.raises(ValueError): node2.get("id=foo") + def test_add(): """test Tag.add()""" node = Tag(wraptext("ref"), wraptext("cite")) @@ -330,19 +379,29 @@ def test_add(): assert attr6 == node.attributes[5] assert attr7 == node.attributes[6] assert attr7 == node.get("name") - assert_wikicode_equal(wrap([Template(wraptext("foobar"))]), - node.attributes[5].value) - assert "".join(("cite")) == node + assert_wikicode_equal( + wrap([Template(wraptext("foobar"))]), node.attributes[5].value + ) + assert ( + "".join( + ("cite") + ) + == node + ) with pytest.raises(ValueError): node.add("name", "foo", quotes="bar") with pytest.raises(ValueError): node.add("name", "a bc d", quotes=None) + def test_remove(): """test Tag.remove()""" - attrs = [agen("id", "foo"), agenp("class", "bar", " ", "\n", "\n"), - agen("foo", "bar"), agenpnv("foo", " ", " \n ", " \t")] + attrs = [ + agen("id", "foo"), + agenp("class", "bar", " ", "\n", "\n"), + agen("foo", "bar"), + agenpnv("foo", " ", " \n ", " \t"), + ] node = Tag(wraptext("div"), attrs=attrs, self_closing=True) node.remove("class") assert '
    ' == node @@ -351,4 +410,4 @@ def test_remove(): with pytest.raises(ValueError): node.remove("foo") node.remove("id") - assert '
    ' == node + assert "
    " == node diff --git a/tests/test_template.py b/tests/test_template.py index 66c02c9..f0154a4 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -34,19 +34,19 @@ from .conftest import assert_wikicode_equal, wrap, wraptext pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True) pgenh = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=False) + def test_str(): """test Template.__str__()""" node = Template(wraptext("foobar")) assert "{{foobar}}" == str(node) - node2 = Template(wraptext("foo"), - [pgenh("1", "bar"), pgens("abc", "def")]) + node2 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("abc", "def")]) assert "{{foo|bar|abc=def}}" == str(node2) + def test_children(): """test Template.__children__()""" node2p1 = Parameter(wraptext("1"), wraptext("bar"), showkey=False) - node2p2 = Parameter(wraptext("abc"), wrap([Text("def"), Text("ghi")]), - showkey=True) + node2p2 = Parameter(wraptext("abc"), wrap([Text("def"), Text("ghi")]), showkey=True) node1 = Template(wraptext("foobar")) node2 = Template(wraptext("foo"), [node2p1, node2p2]) @@ -62,16 +62,23 @@ def test_children(): with pytest.raises(StopIteration): next(gen2) + def test_strip(): """test Template.__strip__()""" node1 = Template(wraptext("foobar")) - node2 = Template(wraptext("foo"), [ - pgenh("1", "bar"), pgens("foo", ""), pgens("abc", "def")]) - node3 = Template(wraptext("foo"), [ - pgenh("1", "foo"), - Parameter(wraptext("2"), wrap([Template(wraptext("hello"))]), - showkey=False), - pgenh("3", "bar")]) + node2 = Template( + wraptext("foo"), [pgenh("1", "bar"), pgens("foo", ""), pgens("abc", "def")] + ) + node3 = Template( + wraptext("foo"), + [ + pgenh("1", "foo"), + Parameter( + wraptext("2"), wrap([Template(wraptext("hello"))]), showkey=False + ), + pgenh("3", "bar"), + ], + ) assert node1.__strip__(keep_template_params=False) is None assert node2.__strip__(keep_template_params=False) is None @@ -79,6 +86,7 @@ def test_strip(): assert "bar def" == node2.__strip__(keep_template_params=True) assert "foo bar" == node3.__strip__(keep_template_params=True) + def test_showtree(): """test Template.__showtree__()""" output = [] @@ -86,18 +94,32 @@ def test_showtree(): get = lambda code: output.append((getter, code)) mark = lambda: output.append(marker) node1 = Template(wraptext("foobar")) - node2 = Template(wraptext("foo"), - [pgenh("1", "bar"), pgens("abc", "def")]) + node2 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("abc", "def")]) node1.__showtree__(output.append, get, mark) node2.__showtree__(output.append, get, mark) valid = [ - "{{", (getter, node1.name), "}}", "{{", (getter, node2.name), - " | ", marker, (getter, node2.params[0].name), " = ", marker, - (getter, node2.params[0].value), " | ", marker, - (getter, node2.params[1].name), " = ", marker, - (getter, node2.params[1].value), "}}"] + "{{", + (getter, node1.name), + "}}", + "{{", + (getter, node2.name), + " | ", + marker, + (getter, node2.params[0].name), + " = ", + marker, + (getter, node2.params[0].value), + " | ", + marker, + (getter, node2.params[1].name), + " = ", + marker, + (getter, node2.params[1].value), + "}}", + ] assert valid == output + def test_name(): """test getter/setter for the name attribute""" name = wraptext("foobar") @@ -110,6 +132,7 @@ def test_name(): assert_wikicode_equal(wraptext("asdf"), node1.name) assert_wikicode_equal(wraptext("téstïng"), node2.name) + def test_params(): """test getter for the params attribute""" node1 = Template(wraptext("foobar")) @@ -118,13 +141,14 @@ def test_params(): assert [] == node1.params assert plist is node2.params + def test_has(): """test Template.has()""" node1 = Template(wraptext("foobar")) - node2 = Template(wraptext("foo"), - [pgenh("1", "bar"), pgens("\nabc ", "def")]) - node3 = Template(wraptext("foo"), - [pgenh("1", "a"), pgens("b", "c"), pgens("1", "d")]) + node2 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("\nabc ", "def")]) + node3 = Template( + wraptext("foo"), [pgenh("1", "a"), pgens("b", "c"), pgens("1", "d")] + ) node4 = Template(wraptext("foo"), [pgenh("1", "a"), pgens("b", " ")]) assert node1.has("foobar", False) is False assert node2.has(1, False) is True @@ -138,6 +162,7 @@ def test_has(): assert node1.has_param("foobar", False) is False assert node2.has_param(1, False) is True + def test_get(): """test Template.get()""" node1 = Template(wraptext("foobar")) @@ -159,16 +184,15 @@ def test_get(): assert node3p2 is node3.get("1") assert node4p1 is node4.get("b ") + def test_add(): """test Template.add()""" node1 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) node2 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) node3 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) node4 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) - node5 = Template(wraptext("a"), [pgens("b", "c"), - pgens(" d ", "e")]) - node6 = Template(wraptext("a"), [pgens("b", "c"), pgens("b", "d"), - pgens("b", "e")]) + node5 = Template(wraptext("a"), [pgens("b", "c"), pgens(" d ", "e")]) + node6 = Template(wraptext("a"), [pgens("b", "c"), pgens("b", "d"), pgens("b", "e")]) node7 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) node8p = pgenh("1", "d") node8 = Template(wraptext("a"), [pgens("b", "c"), node8p]) @@ -176,48 +200,87 @@ def test_add(): node10 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "e")]) node11 = Template(wraptext("a"), [pgens("b", "c")]) node12 = Template(wraptext("a"), [pgens("b", "c")]) - node13 = Template(wraptext("a"), [ - pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) - node14 = Template(wraptext("a\n"), [ - pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"), - pgens("h ", " i\n")]) - node15 = Template(wraptext("a"), [ - pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")]) - node16 = Template(wraptext("a"), [ - pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) + node13 = Template( + wraptext("a"), [pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")] + ) + node14 = Template( + wraptext("a\n"), + [ + pgens("b ", "c\n"), + pgens("d ", " e"), + pgens("f ", "g\n"), + pgens("h ", " i\n"), + ], + ) + node15 = Template( + wraptext("a"), + [pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")], + ) + node16 = Template( + wraptext("a"), [pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")] + ) node17 = Template(wraptext("a"), [pgenh("1", "b")]) node18 = Template(wraptext("a"), [pgenh("1", "b")]) node19 = Template(wraptext("a"), [pgenh("1", "b")]) - node20 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), - pgenh("3", "d"), pgenh("4", "e")]) - node21 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), - pgens("4", "d"), pgens("5", "e")]) - node22 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), - pgens("4", "d"), pgens("5", "e")]) + node20 = Template( + wraptext("a"), + [pgenh("1", "b"), pgenh("2", "c"), pgenh("3", "d"), pgenh("4", "e")], + ) + node21 = Template( + wraptext("a"), + [pgenh("1", "b"), pgenh("2", "c"), pgens("4", "d"), pgens("5", "e")], + ) + node22 = Template( + wraptext("a"), + [pgenh("1", "b"), pgenh("2", "c"), pgens("4", "d"), pgens("5", "e")], + ) node23 = Template(wraptext("a"), [pgenh("1", "b")]) node24 = Template(wraptext("a"), [pgenh("1", "b")]) node25 = Template(wraptext("a"), [pgens("b", "c")]) node26 = Template(wraptext("a"), [pgenh("1", "b")]) node27 = Template(wraptext("a"), [pgenh("1", "b")]) node28 = Template(wraptext("a"), [pgens("1", "b")]) - node29 = Template(wraptext("a"), [ - pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) - node30 = Template(wraptext("a\n"), [ - pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"), - pgens("h ", " i\n")]) - node31 = Template(wraptext("a"), [ - pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")]) - node32 = Template(wraptext("a"), [ - pgens("\nb ", " c "), pgens("\nd ", " e "), pgens("\nf ", " g ")]) - node33 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), - pgens("b", "f"), pgens("b", "h"), - pgens("i", "j")]) - node34 = Template(wraptext("a"), [pgens("1", "b"), pgens("x", "y"), - pgens("1", "c"), pgens("2", "d")]) - node35 = Template(wraptext("a"), [pgens("1", "b"), pgens("x", "y"), - pgenh("1", "c"), pgenh("2", "d")]) - node36 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), - pgens("f", "g")]) + node29 = Template( + wraptext("a"), [pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")] + ) + node30 = Template( + wraptext("a\n"), + [ + pgens("b ", "c\n"), + pgens("d ", " e"), + pgens("f ", "g\n"), + pgens("h ", " i\n"), + ], + ) + node31 = Template( + wraptext("a"), + [pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")], + ) + node32 = Template( + wraptext("a"), + [pgens("\nb ", " c "), pgens("\nd ", " e "), pgens("\nf ", " g ")], + ) + node33 = Template( + wraptext("a"), + [ + pgens("b", "c"), + pgens("d", "e"), + pgens("b", "f"), + pgens("b", "h"), + pgens("i", "j"), + ], + ) + node34 = Template( + wraptext("a"), + [pgens("1", "b"), pgens("x", "y"), pgens("1", "c"), pgens("2", "d")], + ) + node35 = Template( + wraptext("a"), + [pgens("1", "b"), pgens("x", "y"), pgenh("1", "c"), pgenh("2", "d")], + ) + node36 = Template( + wraptext("a"), [pgens("b", "c"), pgens("d", "e"), pgens("f", "g")] + ) node37 = Template(wraptext("a"), [pgenh("1", "")]) node38 = Template(wraptext("abc")) node39 = Template(wraptext("a"), [pgenh("1", " b ")]) @@ -320,65 +383,121 @@ def test_add(): assert "{{a|1= b|2= c|3= d}}" == node41 assert "{{a|b=hello \n}}" == node42 + def test_remove(): """test Template.remove()""" node1 = Template(wraptext("foobar")) - node2 = Template(wraptext("foo"), - [pgenh("1", "bar"), pgens("abc", "def")]) - node3 = Template(wraptext("foo"), - [pgenh("1", "bar"), pgens("abc", "def")]) - node4 = Template(wraptext("foo"), - [pgenh("1", "bar"), pgenh("2", "baz")]) - node5 = Template(wraptext("foo"), [ - pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) - node6 = Template(wraptext("foo"), [ - pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) - node7 = Template(wraptext("foo"), [ - pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")]) - node8 = Template(wraptext("foo"), [ - pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")]) - node9 = Template(wraptext("foo"), [ - pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) - node10 = Template(wraptext("foo"), [ - pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) - node11 = Template(wraptext("foo"), [ - pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) - node12 = Template(wraptext("foo"), [ - pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) - node13 = Template(wraptext("foo"), [ - pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) - node14 = Template(wraptext("foo"), [ - pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) - node15 = Template(wraptext("foo"), [ - pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) - node16 = Template(wraptext("foo"), [ - pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) - node17 = Template(wraptext("foo"), [ - pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) - node18 = Template(wraptext("foo"), [ - pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) - node19 = Template(wraptext("foo"), [ - pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) - node20 = Template(wraptext("foo"), [ - pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) - node21 = Template(wraptext("foo"), [ - pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), - pgens("a", "b")]) - node22 = Template(wraptext("foo"), [ - pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), - pgens("a", "b")]) - node23 = Template(wraptext("foo"), [ - pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), - pgens("a", "b")]) - node24 = Template(wraptext("foo"), [ - pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), - pgens("a", "b")]) - node25 = Template(wraptext("foo"), [ - pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), - pgens("a", "b")]) - node26 = Template(wraptext("foo"), [ - pgens("a", "b"), pgens("c", "d"), pgens("e", "f"), pgens("a", "b"), - pgens("a", "b")]) + node2 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("abc", "def")]) + node3 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("abc", "def")]) + node4 = Template(wraptext("foo"), [pgenh("1", "bar"), pgenh("2", "baz")]) + node5 = Template( + wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] + ) + node6 = Template( + wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] + ) + node7 = Template( + wraptext("foo"), [pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")] + ) + node8 = Template( + wraptext("foo"), [pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")] + ) + node9 = Template( + wraptext("foo"), [pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")] + ) + node10 = Template( + wraptext("foo"), [pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")] + ) + node11 = Template( + wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] + ) + node12 = Template( + wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] + ) + node13 = Template( + wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] + ) + node14 = Template( + wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] + ) + node15 = Template( + wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] + ) + node16 = Template( + wraptext("foo"), [pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")] + ) + node17 = Template( + wraptext("foo"), [pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")] + ) + node18 = Template( + wraptext("foo"), [pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")] + ) + node19 = Template( + wraptext("foo"), [pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")] + ) + node20 = Template( + wraptext("foo"), [pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")] + ) + node21 = Template( + wraptext("foo"), + [ + pgens("a", "b"), + pgens("c", "d"), + pgens("e", "f"), + pgens("a", "b"), + pgens("a", "b"), + ], + ) + node22 = Template( + wraptext("foo"), + [ + pgens("a", "b"), + pgens("c", "d"), + pgens("e", "f"), + pgens("a", "b"), + pgens("a", "b"), + ], + ) + node23 = Template( + wraptext("foo"), + [ + pgens("a", "b"), + pgens("c", "d"), + pgens("e", "f"), + pgens("a", "b"), + pgens("a", "b"), + ], + ) + node24 = Template( + wraptext("foo"), + [ + pgens("a", "b"), + pgens("c", "d"), + pgens("e", "f"), + pgens("a", "b"), + pgens("a", "b"), + ], + ) + node25 = Template( + wraptext("foo"), + [ + pgens("a", "b"), + pgens("c", "d"), + pgens("e", "f"), + pgens("a", "b"), + pgens("a", "b"), + ], + ) + node26 = Template( + wraptext("foo"), + [ + pgens("a", "b"), + pgens("c", "d"), + pgens("e", "f"), + pgens("a", "b"), + pgens("a", "b"), + ], + ) node27 = Template(wraptext("foo"), [pgenh("1", "bar")]) node28 = Template(wraptext("foo"), [pgenh("1", "bar")]) @@ -444,12 +563,14 @@ def test_remove(): with pytest.raises(ValueError): node27.remove(node28.get(1)) + def test_formatting(): """test realistic param manipulation with complex whitespace formatting (assumes that parsing works correctly)""" tests = [ - # https://en.wikipedia.org/w/index.php?title=Lamar_County,_Georgia&oldid=792356004 - ("""{{Infobox U.S. county + # https://en.wikipedia.org/w/index.php?title=Lamar_County,_Georgia&oldid=792356004 + ( + """{{Infobox U.S. county | county = Lamar County | state = Georgia | seal = @@ -471,16 +592,17 @@ def test_formatting(): | district = 3rd | named for = [[Lucius Quintus Cincinnatus Lamar II]] }}""", - """@@ -11,4 +11,4 @@ + """@@ -11,4 +11,4 @@ | area percentage = 1.3% -| census yr = 2010 -| pop = 18317 +| census estimate yr = 2016 +| pop = 12345example ref - | density_sq_mi = 100"""), - - # https://en.wikipedia.org/w/index.php?title=Rockdale_County,_Georgia&oldid=792359760 - ("""{{Infobox U.S. County| + | density_sq_mi = 100""", + ), + # https://en.wikipedia.org/w/index.php?title=Rockdale_County,_Georgia&oldid=792359760 + ( + """{{Infobox U.S. County| county = Rockdale County | state = Georgia | seal = | @@ -500,16 +622,17 @@ def test_formatting(): | district = 4th | time zone= Eastern }}""", - """@@ -11,4 +11,4 @@ + """@@ -11,4 +11,4 @@ area percentage = 1.7% | - census yr = 2010| - pop = 85215 | + census estimate yr = 2016 | + pop = 12345example ref | - density_sq_mi = 657 |"""), - - # https://en.wikipedia.org/w/index.php?title=Spalding_County,_Georgia&oldid=792360413 - ("""{{Infobox U.S. County| + density_sq_mi = 657 |""", + ), + # https://en.wikipedia.org/w/index.php?title=Spalding_County,_Georgia&oldid=792360413 + ( + """{{Infobox U.S. County| | county = Spalding County | | state = Georgia | | seal = | @@ -530,16 +653,17 @@ def test_formatting(): | district = 3rd | time zone = Eastern }}""", - """@@ -11,4 +11,4 @@ + """@@ -11,4 +11,4 @@ | area percentage = 1.6% | -| census yr = 2010| -| pop = 64073 | +| +| census estimate yr = 2016 | pop = 12345example ref | - | density_sq_mi = 326 |"""), - - # https://en.wikipedia.org/w/index.php?title=Clinton_County,_Illinois&oldid=794694648 - ("""{{Infobox U.S. county + | density_sq_mi = 326 |""", + ), + # https://en.wikipedia.org/w/index.php?title=Clinton_County,_Illinois&oldid=794694648 + ( + """{{Infobox U.S. county |county = Clinton County |state = Illinois | ex image = File:Clinton County Courthouse, Carlyle.jpg @@ -560,16 +684,17 @@ def test_formatting(): |web = www.clintonco.illinois.gov | district = 15th }}""", - """@@ -15,4 +15,4 @@ + """@@ -15,4 +15,4 @@ |area percentage = 5.8% - |census yr = 2010 - |pop = 37762 + |census estimate yr = 2016 + |pop = 12345example ref - |density_sq_mi = 80"""), - - # https://en.wikipedia.org/w/index.php?title=Winnebago_County,_Illinois&oldid=789193800 - ("""{{Infobox U.S. county | + |density_sq_mi = 80""", + ), + # https://en.wikipedia.org/w/index.php?title=Winnebago_County,_Illinois&oldid=789193800 + ( + """{{Infobox U.S. county | county = Winnebago County | state = Illinois | seal = Winnebago County il seal.png | @@ -590,19 +715,21 @@ def test_formatting(): | district = 16th | district2 = 17th }}""", - """@@ -11,4 +11,4 @@ + """@@ -11,4 +11,4 @@ area percentage = 1.1% | - census yr = 2010| - pop = 295266 | + census estimate yr = 2016| + pop = 12345example ref | - density_sq_mi = 575""")] + density_sq_mi = 575""", + ), + ] for (original, expected) in tests: code = parse(original) template = code.filter_templates()[0] template.add("pop", "12345example ref") - template.add('census estimate yr', "2016", before="pop") + template.add("census estimate yr", "2016", before="pop") template.remove("census yr") oldlines = original.splitlines(True) diff --git a/tests/test_text.py b/tests/test_text.py index 10d6019..908a7b3 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -26,6 +26,7 @@ import pytest from mwparserfromhell.nodes import Text + def test_str(): """test Text.__str__()""" node = Text("foobar") @@ -33,6 +34,7 @@ def test_str(): node2 = Text("fóóbar") assert "fóóbar" == str(node2) + def test_children(): """test Text.__children__()""" node = Text("foobar") @@ -40,11 +42,13 @@ def test_children(): with pytest.raises(StopIteration): next(gen) + def test_strip(): """test Text.__strip__()""" node = Text("foobar") assert node is node.__strip__() + def test_showtree(): """test Text.__showtree__()""" output = [] @@ -57,6 +61,7 @@ def test_showtree(): res = ["foobar", r"f\xf3\xf3bar", "\\U00010332\\U0001033f\\U00010344"] assert res == output + def test_value(): """test getter/setter for the value attribute""" node = Text("foobar") diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py index 0c09883..f7cb9b0 100644 --- a/tests/test_tokenizer.py +++ b/tests/test_tokenizer.py @@ -33,29 +33,32 @@ try: except ImportError: CTokenizer = None + class _TestParseError(Exception): """Raised internally when a test could not be parsed.""" + def _parse_test(test, data): """Parse an individual *test*, storing its info in *data*.""" for line in test.strip().splitlines(): if line.startswith("name:"): - data["name"] = line[len("name:"):].strip() + data["name"] = line[len("name:") :].strip() elif line.startswith("label:"): - data["label"] = line[len("label:"):].strip() + data["label"] = line[len("label:") :].strip() elif line.startswith("input:"): - raw = line[len("input:"):].strip() + raw = line[len("input:") :].strip() if raw[0] == '"' and raw[-1] == '"': raw = raw[1:-1] raw = raw.encode("raw_unicode_escape") data["input"] = raw.decode("unicode_escape") elif line.startswith("output:"): - raw = line[len("output:"):].strip() + raw = line[len("output:") :].strip() try: data["output"] = eval(raw, vars(tokens)) except Exception as err: raise _TestParseError(err) from err + def _load_tests(filename, name, text): """Load all tests in *text* from the file *filename*.""" tests = text.split("\n---\n") @@ -77,15 +80,18 @@ def _load_tests(filename, name, text): warnings.warn(error.format(filename)) continue if data["input"] is None or data["output"] is None: - error = "Test '{}' in '{}' was ignored because it lacked an input or an output" + error = ( + "Test '{}' in '{}' was ignored because it lacked an input or an output" + ) warnings.warn(error.format(data["name"], filename)) continue # Include test filename in name - data['name'] = '{}:{}'.format(name, data['name']) + data["name"] = "{}:{}".format(name, data["name"]) yield data + def build(): """Load and install all tests from the 'tokenizer' directory.""" directory = path.join(path.dirname(__file__), "tokenizer") @@ -96,31 +102,37 @@ def build(): fullname = path.join(directory, filename) with codecs.open(fullname, "r", encoding="utf8") as fp: text = fp.read() - name = path.split(fullname)[1][:-len(extension)] + name = path.split(fullname)[1][: -len(extension)] yield from _load_tests(fullname, name, text) -@pytest.mark.parametrize("tokenizer", filter(None, ( - CTokenizer, PyTokenizer -)), ids=lambda t: 'CTokenizer' if t.USES_C else 'PyTokenizer') -@pytest.mark.parametrize("data", build(), ids=lambda data: data['name']) + +@pytest.mark.parametrize( + "tokenizer", + filter(None, (CTokenizer, PyTokenizer)), + ids=lambda t: "CTokenizer" if t.USES_C else "PyTokenizer", +) +@pytest.mark.parametrize("data", build(), ids=lambda data: data["name"]) def test_tokenizer(tokenizer, data): expected = data["output"] actual = tokenizer().tokenize(data["input"]) assert expected == actual -@pytest.mark.parametrize("data", build(), ids=lambda data: data['name']) + +@pytest.mark.parametrize("data", build(), ids=lambda data: data["name"]) def test_roundtrip(data): expected = data["input"] actual = str(Builder().build(data["output"][:])) assert expected == actual -@pytest.mark.skipif(CTokenizer is None, reason='CTokenizer not available') + +@pytest.mark.skipif(CTokenizer is None, reason="CTokenizer not available") def test_c_tokenizer_uses_c(): """make sure the C tokenizer identifies as using a C extension""" assert CTokenizer.USES_C is True assert CTokenizer().USES_C is True + def test_describe_context(): assert "" == contexts.describe(0) - ctx = contexts.describe(contexts.TEMPLATE_PARAM_KEY|contexts.HAS_TEXT) + ctx = contexts.describe(contexts.TEMPLATE_PARAM_KEY | contexts.HAS_TEXT) assert "TEMPLATE_PARAM_KEY|HAS_TEXT" == ctx diff --git a/tests/test_tokens.py b/tests/test_tokens.py index 9600165..f4b2b30 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -26,6 +26,7 @@ import pytest from mwparserfromhell.parser import tokens + @pytest.mark.parametrize("name", tokens.__all__) def test_issubclass(name): """check that all classes within the tokens module are really Tokens""" @@ -34,6 +35,7 @@ def test_issubclass(name): assert isinstance(klass(), klass) assert isinstance(klass(), tokens.Token) + def test_attributes(): """check that Token attributes can be managed properly""" token1 = tokens.Token() @@ -54,6 +56,7 @@ def test_attributes(): with pytest.raises(KeyError): token2.__delattr__("baz") + def test_repr(): """check that repr() on a Token works as expected""" token1 = tokens.Token() @@ -65,6 +68,7 @@ def test_repr(): assert repr(token2) in ("Token(foo='bar', baz=123)", "Token(baz=123, foo='bar')") assert "Text(text='" + hundredchars + "')" == repr(token3) + def test_equality(): """check that equivalent tokens are considered equal""" token1 = tokens.Token() @@ -83,11 +87,11 @@ def test_equality(): assert token4 != token6 assert token5 != token6 -@pytest.mark.parametrize("token", [ - tokens.Token(), - tokens.Token(foo="bar", baz=123), - tokens.Text(text="earwig") -]) + +@pytest.mark.parametrize( + "token", + [tokens.Token(), tokens.Token(foo="bar", baz=123), tokens.Text(text="earwig")], +) def test_repr_equality(token): """check that eval(repr(token)) == token""" assert token == eval(repr(token), vars(tokens)) diff --git a/tests/test_utils.py b/tests/test_utils.py index 2116ff6..49964f6 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -28,28 +28,33 @@ from mwparserfromhell.nodes import Template, Text from mwparserfromhell.utils import parse_anything from .conftest import assert_wikicode_equal, wrap, wraptext -@pytest.mark.parametrize("test,valid", [ - (wraptext("foobar"), wraptext("foobar")), - (Template(wraptext("spam")), wrap([Template(wraptext("spam"))])), - ("fóóbar", wraptext("fóóbar")), - (b"foob\xc3\xa1r", wraptext("foobár")), - (123, wraptext("123")), - (True, wraptext("True")), - (None, wrap([])), - ([Text("foo"), Text("bar"), Text("baz")], - wraptext("foo", "bar", "baz")), - ([wraptext("foo"), Text("bar"), "baz", 123, 456], - wraptext("foo", "bar", "baz", "123", "456")), - ([[[([[((("foo",),),)], "bar"],)]]], wraptext("foo", "bar")) -]) + +@pytest.mark.parametrize( + "test,valid", + [ + (wraptext("foobar"), wraptext("foobar")), + (Template(wraptext("spam")), wrap([Template(wraptext("spam"))])), + ("fóóbar", wraptext("fóóbar")), + (b"foob\xc3\xa1r", wraptext("foobár")), + (123, wraptext("123")), + (True, wraptext("True")), + (None, wrap([])), + ([Text("foo"), Text("bar"), Text("baz")], wraptext("foo", "bar", "baz")), + ( + [wraptext("foo"), Text("bar"), "baz", 123, 456], + wraptext("foo", "bar", "baz", "123", "456"), + ), + ([[[([[((("foo",),),)], "bar"],)]]], wraptext("foo", "bar")), + ], +) def test_parse_anything_valid(test, valid): """tests for valid input to utils.parse_anything()""" assert_wikicode_equal(valid, parse_anything(test)) -@pytest.mark.parametrize("invalid", [ - Ellipsis, object, object(), type, - ["foo", [object]] -]) + +@pytest.mark.parametrize( + "invalid", [Ellipsis, object, object(), type, ["foo", [object]]] +) def test_parse_anything_invalid(invalid): """tests for invalid input to utils.parse_anything()""" with pytest.raises(ValueError): diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 990f28b..ce624d7 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -34,6 +34,7 @@ from mwparserfromhell.wikicode import Wikicode from mwparserfromhell import parse from .conftest import wrap, wraptext + def test_str(): """test Wikicode.__str__()""" code1 = parse("foobar") @@ -41,6 +42,7 @@ def test_str(): assert "foobar" == str(code1) assert "Have a {{template}} and a [[page|link]]" == str(code2) + def test_nodes(): """test getter/setter for the nodes attribute""" code = parse("Have a {{template}}") @@ -57,6 +59,7 @@ def test_nodes(): with pytest.raises(ValueError): code.__setattr__("nodes", object) + def test_get(): """test Wikicode.get()""" code = parse("Have a {{template}} and a [[page|link]]") @@ -65,6 +68,7 @@ def test_get(): with pytest.raises(IndexError): code.get(4) + def test_set(): """test Wikicode.set()""" code = parse("Have a {{template}} and a [[page|link]]") @@ -82,6 +86,7 @@ def test_set(): with pytest.raises(IndexError): code.set(-4, "{{baz}}") + def test_contains(): """test Wikicode.contains()""" code = parse("Here is {{aaa|{{bbb|xyz{{ccc}}}}}} and a [[page|link]]") @@ -93,6 +98,7 @@ def test_contains(): assert code.contains(str(tmpl4)) is True assert code.contains(tmpl2.params[0].value) is True + def test_index(): """test Wikicode.index()""" code = parse("Have a {{template}} and a [[page|link]]") @@ -105,13 +111,13 @@ def test_index(): code = parse("{{foo}}{{bar|{{baz}}}}") assert 1 == code.index("{{bar|{{baz}}}}") assert 1 == code.index("{{baz}}", recursive=True) - assert 1 == code.index(code.get(1).get(1).value, - recursive=True) + assert 1 == code.index(code.get(1).get(1).value, recursive=True) with pytest.raises(ValueError): code.index("{{baz}}", recursive=False) with pytest.raises(ValueError): code.index(code.get(1).get(1).value, recursive=False) + def test_get_ancestors_parent(): """test Wikicode.get_ancestors() and Wikicode.get_parent()""" code = parse("{{a|{{b|{{d|{{e}}{{f}}}}{{g}}}}}}{{c}}") @@ -130,6 +136,7 @@ def test_get_ancestors_parent(): with pytest.raises(ValueError): code.get_parent(fake) + def test_insert(): """test Wikicode.insert()""" code = parse("Have a {{template}} and a [[page|link]]") @@ -144,14 +151,22 @@ def test_insert(): code2 = parse("{{foo}}{{bar}}{{baz}}") code2.insert(1, "abc{{def}}ghi[[jk]]") assert "{{foo}}abc{{def}}ghi[[jk]]{{bar}}{{baz}}" == code2 - assert ["{{foo}}", "abc", "{{def}}", "ghi", "[[jk]]", - "{{bar}}", "{{baz}}"] == code2.nodes + assert [ + "{{foo}}", + "abc", + "{{def}}", + "ghi", + "[[jk]]", + "{{bar}}", + "{{baz}}", + ] == code2.nodes code3 = parse("{{foo}}bar") code3.insert(1000, "[[baz]]") code3.insert(-1000, "derp") assert "derp{{foo}}bar[[baz]]" == code3 + def _test_search(meth, expected): """Base test for insert_before(), insert_after(), and replace().""" code = parse("{{a}}{{b}}{{c}}{{d}}{{e}}") @@ -249,6 +264,7 @@ def _test_search(meth, expected): meth(code9, code9.get_sections()[0], "{{quz}}") assert expected[8] == code9 + def test_insert_before(): """test Wikicode.insert_before()""" meth = lambda code, *args, **kw: code.insert_before(*args, **kw) @@ -265,6 +281,7 @@ def test_insert_before(): ] _test_search(meth, expected) + def test_insert_after(): """test Wikicode.insert_after()""" meth = lambda code, *args, **kw: code.insert_after(*args, **kw) @@ -281,6 +298,7 @@ def test_insert_after(): ] _test_search(meth, expected) + def test_replace(): """test Wikicode.replace()""" meth = lambda code, *args, **kw: code.replace(*args, **kw) @@ -297,6 +315,7 @@ def test_replace(): ] _test_search(meth, expected) + def test_append(): """test Wikicode.append()""" code = parse("Have a {{template}}") @@ -310,6 +329,7 @@ def test_append(): with pytest.raises(ValueError): code.append(slice(0, 1)) + def test_remove(): """test Wikicode.remove()""" meth = lambda code, obj, value, **kw: code.remove(obj, **kw) @@ -326,6 +346,7 @@ def test_remove(): ] _test_search(meth, expected) + def test_matches(): """test Wikicode.matches()""" code1 = parse("Cleanup") @@ -357,17 +378,32 @@ def test_matches(): assert code5.matches("") is True assert code5.matches(("a", "b", "")) is True + def test_filter_family(): """test the Wikicode.i?filter() family of functions""" + def genlist(gen): assert isinstance(gen, GeneratorType) return list(gen) + ifilter = lambda code: (lambda *a, **k: genlist(code.ifilter(*a, **k))) code = parse("a{{b}}c[[d]]{{{e}}}{{f}}[[g]]") for func in (code.filter, ifilter(code)): - assert ["a", "{{b}}", "b", "c", "[[d]]", "d", "{{{e}}}", - "e", "{{f}}", "f", "[[g]]", "g"] == func() + assert [ + "a", + "{{b}}", + "b", + "c", + "[[d]]", + "d", + "{{{e}}}", + "e", + "{{f}}", + "f", + "[[g]]", + "g", + ] == func() assert ["{{{e}}}"] == func(forcetype=Argument) assert code.get(4) is func(forcetype=Argument)[0] assert list("abcdefg") == func(forcetype=Text) @@ -377,7 +413,7 @@ def test_filter_family(): funcs = [ lambda name, **kw: getattr(code, "filter_" + name)(**kw), - lambda name, **kw: genlist(getattr(code, "ifilter_" + name)(**kw)) + lambda name, **kw: genlist(getattr(code, "ifilter_" + name)(**kw)), ] for get_filter in funcs: assert ["{{{e}}}"] == get_filter("arguments") @@ -393,27 +429,35 @@ def test_filter_family(): code2 = parse("{{a|{{b}}|{{c|d={{f}}{{h}}}}}}") for func in (code2.filter, ifilter(code2)): - assert ["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"] \ - == func(recursive=False, forcetype=Template) - assert ["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}", - "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"] \ - == func(recursive=True, forcetype=Template) + assert ["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"] == func( + recursive=False, forcetype=Template + ) + assert [ + "{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", + "{{b}}", + "{{c|d={{f}}{{h}}}}", + "{{f}}", + "{{h}}", + ] == func(recursive=True, forcetype=Template) code3 = parse("{{foobar}}{{FOO}}{{baz}}{{bz}}{{barfoo}}") for func in (code3.filter, ifilter(code3)): - assert ["{{foobar}}", "{{barfoo}}"] \ - == func(False, matches=lambda node: "foo" in node) - assert ["{{foobar}}", "{{FOO}}", "{{barfoo}}"] \ - == func(False, matches=r"foo") - assert ["{{foobar}}", "{{FOO}}"] \ - == func(matches=r"^{{foo.*?}}") - assert ["{{foobar}}"] \ - == func(matches=r"^{{foo.*?}}", flags=re.UNICODE) + assert ["{{foobar}}", "{{barfoo}}"] == func( + False, matches=lambda node: "foo" in node + ) + assert ["{{foobar}}", "{{FOO}}", "{{barfoo}}"] == func(False, matches=r"foo") + assert ["{{foobar}}", "{{FOO}}"] == func(matches=r"^{{foo.*?}}") + assert ["{{foobar}}"] == func(matches=r"^{{foo.*?}}", flags=re.UNICODE) assert ["{{baz}}", "{{bz}}"] == func(matches=r"^{{b.*?z") assert ["{{baz}}"] == func(matches=r"^{{b.+?z}}") - exp_rec = ["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}", - "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"] + exp_rec = [ + "{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", + "{{b}}", + "{{c|d={{f}}{{h}}}}", + "{{f}}", + "{{h}}", + ] exp_unrec = ["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"] assert exp_rec == code2.filter_templates() assert exp_unrec == code2.filter_templates(recursive=False) @@ -422,9 +466,9 @@ def test_filter_family(): assert exp_unrec == code2.filter_templates(False) assert ["{{foobar}}"] == code3.filter_templates( - matches=lambda node: node.name.matches("Foobar")) - assert ["{{baz}}", "{{bz}}"] \ - == code3.filter_templates(matches=r"^{{b.*?z") + matches=lambda node: node.name.matches("Foobar") + ) + assert ["{{baz}}", "{{bz}}"] == code3.filter_templates(matches=r"^{{b.*?z") assert [] == code3.filter_tags(matches=r"^{{b.*?z") assert [] == code3.filter_tags(matches=r"^{{b.*?z", flags=0) with pytest.raises(TypeError): @@ -440,6 +484,7 @@ def test_filter_family(): assert ["{{foo}}", "{{foo|{{bar}}}}"] == actual1 assert ["{{foo}}", "{{foo|{{bar}}}}"] == actual2 + def test_get_sections(): """test Wikicode.get_sections()""" page1 = parse("") @@ -461,44 +506,70 @@ def test_get_sections(): assert [""] == page1.get_sections() assert ["", "==Heading=="] == page2.get_sections() - assert ["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n", "====Gnidaeh====\n"] \ - == page3.get_sections() - assert [p4_lead, p4_I, p4_IA, p4_IB, p4_IB1, p4_II, - p4_III, p4_IIIA, p4_IIIA1a, p4_IIIA2, p4_IIIA2ai1] \ - == page4.get_sections() + assert [ + "", + "===Heading===\nFoo bar baz\n====Gnidaeh====\n", + "====Gnidaeh====\n", + ] == page3.get_sections() + assert [ + p4_lead, + p4_I, + p4_IA, + p4_IB, + p4_IB1, + p4_II, + p4_III, + p4_IIIA, + p4_IIIA1a, + p4_IIIA2, + p4_IIIA2ai1, + ] == page4.get_sections() assert ["====Gnidaeh====\n"] == page3.get_sections(levels=[4]) - assert ["===Heading===\nFoo bar baz\n====Gnidaeh====\n"] \ - == page3.get_sections(levels=(2, 3)) - assert ["===Heading===\nFoo bar baz\n"] \ - == page3.get_sections(levels=(2, 3), flat=True) + assert ["===Heading===\nFoo bar baz\n====Gnidaeh====\n"] == page3.get_sections( + levels=(2, 3) + ) + assert ["===Heading===\nFoo bar baz\n"] == page3.get_sections( + levels=(2, 3), flat=True + ) assert [] == page3.get_sections(levels=[0]) - assert ["", "====Gnidaeh====\n"] == page3.get_sections(levels=[4], include_lead=True) - assert ["===Heading===\nFoo bar baz\n====Gnidaeh====\n", - "====Gnidaeh====\n"] == page3.get_sections(include_lead=False) - assert ["===Heading===\nFoo bar baz\n", "====Gnidaeh====\n"] \ - == page3.get_sections(flat=True, include_lead=False) + assert ["", "====Gnidaeh====\n"] == page3.get_sections( + levels=[4], include_lead=True + ) + assert [ + "===Heading===\nFoo bar baz\n====Gnidaeh====\n", + "====Gnidaeh====\n", + ] == page3.get_sections(include_lead=False) + assert ["===Heading===\nFoo bar baz\n", "====Gnidaeh====\n"] == page3.get_sections( + flat=True, include_lead=False + ) assert [p4_IB1, p4_IIIA2] == page4.get_sections(levels=[4]) assert [p4_IA, p4_IB, p4_IIIA] == page4.get_sections(levels=[3]) - assert [p4_IA, "=== Section I.B ===\n", - "=== Section III.A ===\nText.\n"] \ - == page4.get_sections(levels=[3], flat=True) + assert [ + p4_IA, + "=== Section I.B ===\n", + "=== Section III.A ===\nText.\n", + ] == page4.get_sections(levels=[3], flat=True) assert ["", ""] == page2.get_sections(include_headings=False) - assert ["\nSection I.B.1 body.\n\n•Some content.\n\n", - "\nEven more text.\n" + p4_IIIA2ai1] \ - == page4.get_sections(levels=[4], include_headings=False) + assert [ + "\nSection I.B.1 body.\n\n•Some content.\n\n", + "\nEven more text.\n" + p4_IIIA2ai1, + ] == page4.get_sections(levels=[4], include_headings=False) assert [] == page4.get_sections(matches=r"body") - assert [p4_I, p4_IA, p4_IB, p4_IB1] \ - == page4.get_sections(matches=r"Section\sI[.\s].*?") - assert [p4_IA, p4_IIIA, p4_IIIA1a, p4_IIIA2, p4_IIIA2ai1] \ - == page4.get_sections(matches=r".*?a.*?") - assert [p4_IIIA1a, p4_IIIA2ai1] \ - == page4.get_sections(matches=r".*?a.*?", flags=re.U) - assert ["\nMore text.\n", "\nAn invalid section!"] \ - == page4.get_sections(matches=r".*?a.*?", flags=re.U, - include_headings=False) + assert [p4_I, p4_IA, p4_IB, p4_IB1] == page4.get_sections( + matches=r"Section\sI[.\s].*?" + ) + assert [p4_IA, p4_IIIA, p4_IIIA1a, p4_IIIA2, p4_IIIA2ai1] == page4.get_sections( + matches=r".*?a.*?" + ) + assert [p4_IIIA1a, p4_IIIA2ai1] == page4.get_sections( + matches=r".*?a.*?", flags=re.U + ) + assert ["\nMore text.\n", "\nAn invalid section!"] == page4.get_sections( + matches=r".*?a.*?", flags=re.U, include_headings=False + ) sections = page2.get_sections(include_headings=False) sections[0].append("Lead!\n") @@ -512,22 +583,22 @@ def test_get_sections(): assert "== Foo ==\nBarf {{Haha}}\n" == section assert "X\n== Foo ==\nBarf {{Haha}}\n== Baz ==\nBuzz" == page5 + def test_strip_code(): """test Wikicode.strip_code()""" # Since individual nodes have test cases for their __strip__ methods, # we're only going to do an integration test: code = parse("Foo [[bar]]\n\n{{baz|hello}}\n\n[[a|b]] Σ") - assert "Foo bar\n\nb Σ" \ - == code.strip_code(normalize=True, collapse=True) - assert "Foo bar\n\n\n\nb Σ" \ - == code.strip_code(normalize=True, collapse=False) - assert "Foo bar\n\nb Σ" \ - == code.strip_code(normalize=False, collapse=True) - assert "Foo bar\n\n\n\nb Σ" \ - == code.strip_code(normalize=False, collapse=False) - assert "Foo bar\n\nhello\n\nb Σ" \ - == code.strip_code(normalize=True, collapse=True, - keep_template_params=True) + assert "Foo bar\n\nb Σ" == code.strip_code(normalize=True, collapse=True) + assert "Foo bar\n\n\n\nb Σ" == code.strip_code(normalize=True, collapse=False) + assert "Foo bar\n\nb Σ" == code.strip_code(normalize=False, collapse=True) + assert "Foo bar\n\n\n\nb Σ" == code.strip_code( + normalize=False, collapse=False + ) + assert "Foo bar\n\nhello\n\nb Σ" == code.strip_code( + normalize=True, collapse=True, keep_template_params=True + ) + def test_get_tree(): """test Wikicode.get_tree()""" @@ -535,6 +606,8 @@ def test_get_tree(): # methods, and the docstring covers all possibilities for the output of # __showtree__, we'll test it only: code = parse("Lorem ipsum {{foo|bar|{{baz}}|spam=eggs}}") - expected = "Lorem ipsum \n{{\n\t foo\n\t| 1\n\t= bar\n\t| 2\n\t= " + \ - "{{\n\t\t\tbaz\n\t }}\n\t| spam\n\t= eggs\n}}" + expected = ( + "Lorem ipsum \n{{\n\t foo\n\t| 1\n\t= bar\n\t| 2\n\t= " + + "{{\n\t\t\tbaz\n\t }}\n\t| spam\n\t= eggs\n}}" + ) assert expected.expandtabs(4) == code.get_tree() diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py index 1d5e66d..7eab55b 100644 --- a/tests/test_wikilink.py +++ b/tests/test_wikilink.py @@ -27,6 +27,7 @@ import pytest from mwparserfromhell.nodes import Text, Wikilink from .conftest import assert_wikicode_equal, wrap, wraptext + def test_str(): """test Wikilink.__str__()""" node = Wikilink(wraptext("foobar")) @@ -34,6 +35,7 @@ def test_str(): node2 = Wikilink(wraptext("foo"), wraptext("bar")) assert "[[foo|bar]]" == str(node2) + def test_children(): """test Wikilink.__children__()""" node1 = Wikilink(wraptext("foobar")) @@ -48,6 +50,7 @@ def test_children(): with pytest.raises(StopIteration): next(gen2) + def test_strip(): """test Wikilink.__strip__()""" node = Wikilink(wraptext("foobar")) @@ -55,6 +58,7 @@ def test_strip(): assert "foobar" == node.__strip__() assert "bar" == node2.__strip__() + def test_showtree(): """test Wikilink.__showtree__()""" output = [] @@ -66,10 +70,19 @@ def test_showtree(): node1.__showtree__(output.append, get, mark) node2.__showtree__(output.append, get, mark) valid = [ - "[[", (getter, node1.title), "]]", "[[", (getter, node2.title), - " | ", marker, (getter, node2.text), "]]"] + "[[", + (getter, node1.title), + "]]", + "[[", + (getter, node2.title), + " | ", + marker, + (getter, node2.text), + "]]", + ] assert valid == output + def test_title(): """test getter/setter for the title attribute""" title = wraptext("foobar") @@ -82,6 +95,7 @@ def test_title(): assert_wikicode_equal(wraptext("héhehé"), node1.title) assert_wikicode_equal(wraptext("héhehé"), node2.title) + def test_text(): """test getter/setter for the text attribute""" text = wraptext("baz") From c7d1c3d660233c02d1c0d33c44836eba1830526c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 2 Sep 2021 02:04:52 -0400 Subject: [PATCH 6/7] Add pre-commit hooks --- .pre-commit-config.yaml | 9 +++ docs/conf.py | 146 ++++++++++++++++++++++++++---------------------- 2 files changed, 89 insertions(+), 66 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..daac892 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,9 @@ +repos: +- repo: https://github.com/psf/black + rev: 21.8b0 + hooks: + - id: black +- repo: https://github.com/doublify/pre-commit-clang-format + rev: 62302476d0da01515660132d76902359bed0f782 + hooks: + - id: clang-format diff --git a/docs/conf.py b/docs/conf.py index 7aae516..61d5b20 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,33 +16,33 @@ import sys, os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.insert(0, os.path.abspath('..')) +sys.path.insert(0, os.path.abspath("..")) import mwparserfromhell # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +# needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx', 'sphinx.ext.viewcode'] +extensions = ["sphinx.ext.autodoc", "sphinx.ext.intersphinx", "sphinx.ext.viewcode"] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. -#source_encoding = 'utf-8-sig' +# source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = u'mwparserfromhell' -copyright = u'2012–2021 Ben Kurtovic' +project = "mwparserfromhell" +copyright = "2012–2021 Ben Kurtovic" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -55,158 +55,161 @@ release = mwparserfromhell.__version__ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. -#language = None +# language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build'] +exclude_patterns = ["_build"] # The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'nature' +html_theme = "nature" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +# html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] +# html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -#html_title = None +# html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = None +# html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +# html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +# html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. -#html_domain_indices = True +# html_domain_indices = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +# html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True +# html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True +# html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None +# html_file_suffix = None # Output file base name for HTML help builder. -htmlhelp_basename = 'mwparserfromhelldoc' +htmlhelp_basename = "mwparserfromhelldoc" # -- Options for LaTeX output -------------------------------------------------- latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + #'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ('index', 'mwparserfromhell.tex', u'mwparserfromhell Documentation', - u'Ben Kurtovic', 'manual'), + ( + "index", + "mwparserfromhell.tex", + "mwparserfromhell Documentation", + "Ben Kurtovic", + "manual", + ) ] # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # If true, show page references after internal links. -#latex_show_pagerefs = False +# latex_show_pagerefs = False # If true, show URL addresses after external links. -#latex_show_urls = False +# latex_show_urls = False # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_domain_indices = True +# latex_domain_indices = True # -- Options for manual page output -------------------------------------------- @@ -214,12 +217,17 @@ latex_documents = [ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - ('index', 'mwparserfromhell', u'mwparserfromhell Documentation', - [u'Ben Kurtovic'], 1) + ( + "index", + "mwparserfromhell", + "mwparserfromhell Documentation", + ["Ben Kurtovic"], + 1, + ) ] # If true, show URL addresses after external links. -#man_show_urls = False +# man_show_urls = False # -- Options for Texinfo output ------------------------------------------------ @@ -228,20 +236,26 @@ man_pages = [ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'mwparserfromhell', u'mwparserfromhell Documentation', - u'Ben Kurtovic', 'mwparserfromhell', 'One line description of project.', - 'Miscellaneous'), + ( + "index", + "mwparserfromhell", + "mwparserfromhell Documentation", + "Ben Kurtovic", + "mwparserfromhell", + "One line description of project.", + "Miscellaneous", + ) ] # Documents to append as an appendix to all manuals. -#texinfo_appendices = [] +# texinfo_appendices = [] # If false, no module index is generated. -#texinfo_domain_indices = True +# texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' +# texinfo_show_urls = 'footnote' # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'http://docs.python.org/': None} +intersphinx_mapping = {"http://docs.python.org/": None} From be4746f73dfa9514f06dd7f93bf4be9a47d5d490 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 2 Sep 2021 02:10:22 -0400 Subject: [PATCH 7/7] release/0.6.3 --- CHANGELOG | 2 +- appveyor.yml | 2 +- docs/changelog.rst | 8 ++++---- src/mwparserfromhell/__init__.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 09d14e7..012fe42 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,4 +1,4 @@ -v0.7 (unreleased): +v0.6.3 (released September 2, 2021): - Added Linux AArch64 wheels. (#276) - Fixed C integer conversion, manifesting as parsing errors on big-endian diff --git a/appveyor.yml b/appveyor.yml index 5f89a41..4dd77e4 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,6 +1,6 @@ # This config file is used by appveyor.com to build Windows release binaries -version: 0.7.dev0-b{build} +version: 0.6.3-b{build} branches: only: diff --git a/docs/changelog.rst b/docs/changelog.rst index 7fe93dc..5585f39 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,11 +1,11 @@ Changelog ========= -v0.7 ----- +v0.6.3 +------ -Unreleased -(`changes `__): +`Released September 2, 2021 `_ +(`changes `__): - Added Linux AArch64 wheels. (`#276 `_) diff --git a/src/mwparserfromhell/__init__.py b/src/mwparserfromhell/__init__.py index 6e65a7e..7c19a21 100644 --- a/src/mwparserfromhell/__init__.py +++ b/src/mwparserfromhell/__init__.py @@ -27,7 +27,7 @@ outrageously powerful parser for `MediaWiki `_ wikico __author__ = "Ben Kurtovic" __copyright__ = "Copyright (C) 2012-2021 Ben Kurtovic" __license__ = "MIT License" -__version__ = "0.7.dev0" +__version__ = "0.6.3" __email__ = "ben.kurtovic@gmail.com" from . import definitions, nodes, parser, smart_list, string_mixin, utils, wikicode