@@ -1,3 +1,10 @@ | |||||
v0.5.1 (released March 03, 2018): | |||||
- Improved behavior when adding parameters to templates (via Template.add()) | |||||
with poorly formatted whitespace conventions. (#185) | |||||
- Fixed the parser getting stuck in deeply nested HTML tags with unclosed, | |||||
quoted attributes. (#190) | |||||
v0.5 (released June 23, 2017): | v0.5 (released June 23, 2017): | ||||
- Added Wikicode.contains() to determine whether a Node or Wikicode object is | - Added Wikicode.contains() to determine whether a Node or Wikicode object is | ||||
@@ -1,4 +1,4 @@ | |||||
Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Copyright (C) 2012-2018 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy | Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
of this software and associated documentation files (the "Software"), to deal | of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# This config file is used by appveyor.com to build Windows release binaries | # This config file is used by appveyor.com to build Windows release binaries | ||||
version: 0.5-b{build} | |||||
version: 0.5.1-b{build} | |||||
branches: | branches: | ||||
only: | only: | ||||
@@ -1,6 +1,19 @@ | |||||
Changelog | Changelog | ||||
========= | ========= | ||||
v0.5.1 | |||||
------ | |||||
`Released March 03, 2018 <https://github.com/earwig/mwparserfromhell/tree/v0.5.1>`_ | |||||
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.5...v0.5.1>`__): | |||||
- Improved behavior when adding parameters to templates (via | |||||
:meth:`.Template.add`) with poorly formatted whitespace conventions. | |||||
(`#185 <https://github.com/earwig/mwparserfromhell/issues/185>`_) | |||||
- Fixed the parser getting stuck in deeply nested HTML tags with unclosed, | |||||
quoted attributes. | |||||
(`#190 <https://github.com/earwig/mwparserfromhell/issues/190>`_) | |||||
v0.5 | v0.5 | ||||
---- | ---- | ||||
@@ -42,7 +42,7 @@ master_doc = 'index' | |||||
# General information about the project. | # General information about the project. | ||||
project = u'mwparserfromhell' | project = u'mwparserfromhell' | ||||
copyright = u'2012, 2013, 2014, 2015, 2016, 2017 Ben Kurtovic' | |||||
copyright = u'2012–2018 Ben Kurtovic' | |||||
# The version info for the project you're documenting, acts as replacement for | # The version info for the project you're documenting, acts as replacement for | ||||
# |version| and |release|, also used in various other places throughout the | # |version| and |release|, also used in various other places throughout the | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2018 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -27,9 +27,9 @@ outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode. | |||||
""" | """ | ||||
__author__ = "Ben Kurtovic" | __author__ = "Ben Kurtovic" | ||||
__copyright__ = "Copyright (C) 2012, 2013, 2014, 2015, 2016 Ben Kurtovic" | |||||
__copyright__ = "Copyright (C) 2012-2018 Ben Kurtovic" | |||||
__license__ = "MIT License" | __license__ = "MIT License" | ||||
__version__ = "0.5" | |||||
__version__ = "0.5.1" | |||||
__email__ = "ben.kurtovic@gmail.com" | __email__ = "ben.kurtovic@gmail.com" | ||||
from . import (compat, definitions, nodes, parser, smart_list, string_mixin, | from . import (compat, definitions, nodes, parser, smart_list, string_mixin, | ||||
@@ -101,7 +101,7 @@ class Template(Node): | |||||
values = tuple(theories.values()) | values = tuple(theories.values()) | ||||
best = max(values) | best = max(values) | ||||
confidence = float(best) / sum(values) | confidence = float(best) / sum(values) | ||||
if confidence >= 0.75: | |||||
if confidence > 0.5: | |||||
return tuple(theories.keys())[values.index(best)] | return tuple(theories.keys())[values.index(best)] | ||||
@staticmethod | @staticmethod | ||||
@@ -130,6 +130,8 @@ class Template(Node): | |||||
before_theories = defaultdict(lambda: 0) | before_theories = defaultdict(lambda: 0) | ||||
after_theories = defaultdict(lambda: 0) | after_theories = defaultdict(lambda: 0) | ||||
for param in self.params: | for param in self.params: | ||||
if not param.showkey: | |||||
continue | |||||
if use_names: | if use_names: | ||||
component = str(param.name) | component = str(param.name) | ||||
else: | else: | ||||
@@ -1,5 +1,5 @@ | |||||
/* | /* | ||||
Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Copyright (C) 2012-2018 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | Permission is hereby granted, free of charge, to any person obtaining a copy of | ||||
this software and associated documentation files (the "Software"), to deal in | this software and associated documentation files (the "Software"), to deal in | ||||
@@ -722,7 +722,6 @@ Tokenizer_remove_uri_scheme_from_textbuffer(Tokenizer* self, PyObject* link) | |||||
*/ | */ | ||||
static int Tokenizer_parse_external_link(Tokenizer* self, int brackets) | static int Tokenizer_parse_external_link(Tokenizer* self, int brackets) | ||||
{ | { | ||||
#define INVALID_CONTEXT self->topstack->context & AGG_NO_EXT_LINKS | |||||
#define NOT_A_LINK \ | #define NOT_A_LINK \ | ||||
if (!brackets && self->topstack->context & LC_DLTERM) \ | if (!brackets && self->topstack->context & LC_DLTERM) \ | ||||
return Tokenizer_handle_dl_term(self); \ | return Tokenizer_handle_dl_term(self); \ | ||||
@@ -732,7 +731,8 @@ static int Tokenizer_parse_external_link(Tokenizer* self, int brackets) | |||||
PyObject *link, *kwargs; | PyObject *link, *kwargs; | ||||
Textbuffer *extra; | Textbuffer *extra; | ||||
if (INVALID_CONTEXT || !(Tokenizer_CAN_RECURSE(self))) { | |||||
if (self->topstack->context & AGG_NO_EXT_LINKS || | |||||
!(Tokenizer_CAN_RECURSE(self))) { | |||||
NOT_A_LINK; | NOT_A_LINK; | ||||
} | } | ||||
extra = Textbuffer_new(&self->text); | extra = Textbuffer_new(&self->text); | ||||
@@ -1280,6 +1280,7 @@ static int Tokenizer_handle_tag_data( | |||||
else if (data->context & TAG_NOTE_SPACE) { | else if (data->context & TAG_NOTE_SPACE) { | ||||
if (data->context & TAG_QUOTED) { | if (data->context & TAG_QUOTED) { | ||||
data->context = TAG_ATTR_VALUE; | data->context = TAG_ATTR_VALUE; | ||||
Tokenizer_memoize_bad_route(self); | |||||
trash = Tokenizer_pop(self); | trash = Tokenizer_pop(self); | ||||
Py_XDECREF(trash); | Py_XDECREF(trash); | ||||
self->head = data->reset - 1; // Will be auto-incremented | self->head = data->reset - 1; // Will be auto-incremented | ||||
@@ -1317,7 +1318,12 @@ static int Tokenizer_handle_tag_data( | |||||
data->context |= TAG_QUOTED; | data->context |= TAG_QUOTED; | ||||
data->quoter = chunk; | data->quoter = chunk; | ||||
data->reset = self->head; | data->reset = self->head; | ||||
if (Tokenizer_push(self, self->topstack->context)) | |||||
if (Tokenizer_check_route(self, self->topstack->context) < 0) { | |||||
RESET_ROUTE(); | |||||
data->context = TAG_ATTR_VALUE; | |||||
self->head--; | |||||
} | |||||
else if (Tokenizer_push(self, self->topstack->context)) | |||||
return -1; | return -1; | ||||
return 0; | return 0; | ||||
} | } | ||||
@@ -1613,6 +1619,7 @@ static PyObject* Tokenizer_really_parse_tag(Tokenizer* self) | |||||
if (data->context & TAG_QUOTED) { | if (data->context & TAG_QUOTED) { | ||||
// Unclosed attribute quote: reset, don't die | // Unclosed attribute quote: reset, don't die | ||||
data->context = TAG_ATTR_VALUE; | data->context = TAG_ATTR_VALUE; | ||||
Tokenizer_memoize_bad_route(self); | |||||
trash = Tokenizer_pop(self); | trash = Tokenizer_pop(self); | ||||
Py_XDECREF(trash); | Py_XDECREF(trash); | ||||
self->head = data->reset; | self->head = data->reset; | ||||
@@ -2185,6 +2192,7 @@ static PyObject* Tokenizer_handle_table_style(Tokenizer* self, Unicode end_token | |||||
if (data->context & TAG_QUOTED) { | if (data->context & TAG_QUOTED) { | ||||
// Unclosed attribute quote: reset, don't die | // Unclosed attribute quote: reset, don't die | ||||
data->context = TAG_ATTR_VALUE; | data->context = TAG_ATTR_VALUE; | ||||
Tokenizer_memoize_bad_route(self); | |||||
trash = Tokenizer_pop(self); | trash = Tokenizer_pop(self); | ||||
Py_XDECREF(trash); | Py_XDECREF(trash); | ||||
self->head = data->reset; | self->head = data->reset; | ||||
@@ -1,5 +1,5 @@ | |||||
/* | /* | ||||
Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Copyright (C) 2012-2018 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | Permission is hereby granted, free of charge, to any person obtaining a copy of | ||||
this software and associated documentation files (the "Software"), to deal in | this software and associated documentation files (the "Software"), to deal in | ||||
@@ -147,6 +147,22 @@ static int compare_nodes( | |||||
} | } | ||||
/* | /* | ||||
Remember that the current route (head + context at push) is invalid. | |||||
This will be noticed when calling Tokenizer_check_route with the same head | |||||
and context, and the route will be failed immediately. | |||||
*/ | |||||
void Tokenizer_memoize_bad_route(Tokenizer *self) | |||||
{ | |||||
route_tree_node *node = malloc(sizeof(route_tree_node)); | |||||
if (node) { | |||||
node->id = self->topstack->ident; | |||||
if (avl_tree_insert(&self->bad_routes, &node->node, compare_nodes)) | |||||
free(node); | |||||
} | |||||
} | |||||
/* | |||||
Fail the current tokenization route. Discards the current | Fail the current tokenization route. Discards the current | ||||
stack/context/textbuffer and sets the BAD_ROUTE flag. Also records the | stack/context/textbuffer and sets the BAD_ROUTE flag. Also records the | ||||
ident of the failed stack so future parsing attempts down this route can be | ident of the failed stack so future parsing attempts down this route can be | ||||
@@ -157,13 +173,7 @@ void* Tokenizer_fail_route(Tokenizer* self) | |||||
uint64_t context = self->topstack->context; | uint64_t context = self->topstack->context; | ||||
PyObject* stack; | PyObject* stack; | ||||
route_tree_node *node = malloc(sizeof(route_tree_node)); | |||||
if (node) { | |||||
node->id = self->topstack->ident; | |||||
if (avl_tree_insert(&self->bad_routes, &node->node, compare_nodes)) | |||||
free(node); | |||||
} | |||||
Tokenizer_memoize_bad_route(self); | |||||
stack = Tokenizer_pop(self); | stack = Tokenizer_pop(self); | ||||
Py_XDECREF(stack); | Py_XDECREF(stack); | ||||
FAIL_ROUTE(context); | FAIL_ROUTE(context); | ||||
@@ -173,7 +183,7 @@ void* Tokenizer_fail_route(Tokenizer* self) | |||||
/* | /* | ||||
Check if pushing a new route here with the given context would definitely | Check if pushing a new route here with the given context would definitely | ||||
fail, based on a previous call to Tokenizer_fail_route() with the same | fail, based on a previous call to Tokenizer_fail_route() with the same | ||||
stack. | |||||
stack. (Or any other call to Tokenizer_memoize_bad_route().) | |||||
Return 0 if safe and -1 if unsafe. The BAD_ROUTE flag will be set in the | Return 0 if safe and -1 if unsafe. The BAD_ROUTE flag will be set in the | ||||
latter case. | latter case. | ||||
@@ -1,5 +1,5 @@ | |||||
/* | /* | ||||
Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Copyright (C) 2012-2018 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | Permission is hereby granted, free of charge, to any person obtaining a copy of | ||||
this software and associated documentation files (the "Software"), to deal in | this software and associated documentation files (the "Software"), to deal in | ||||
@@ -31,6 +31,7 @@ int Tokenizer_push_textbuffer(Tokenizer*); | |||||
void Tokenizer_delete_top_of_stack(Tokenizer*); | void Tokenizer_delete_top_of_stack(Tokenizer*); | ||||
PyObject* Tokenizer_pop(Tokenizer*); | PyObject* Tokenizer_pop(Tokenizer*); | ||||
PyObject* Tokenizer_pop_keeping_context(Tokenizer*); | PyObject* Tokenizer_pop_keeping_context(Tokenizer*); | ||||
void Tokenizer_memoize_bad_route(Tokenizer*); | |||||
void* Tokenizer_fail_route(Tokenizer*); | void* Tokenizer_fail_route(Tokenizer*); | ||||
int Tokenizer_check_route(Tokenizer*, uint64_t); | int Tokenizer_check_route(Tokenizer*, uint64_t); | ||||
void Tokenizer_free_bad_route_tree(Tokenizer*); | void Tokenizer_free_bad_route_tree(Tokenizer*); | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2018 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -144,6 +144,14 @@ class Tokenizer(object): | |||||
"""Return whether or not our max recursion depth has been exceeded.""" | """Return whether or not our max recursion depth has been exceeded.""" | ||||
return self._depth < self.MAX_DEPTH | return self._depth < self.MAX_DEPTH | ||||
def _memoize_bad_route(self): | |||||
"""Remember that the current route (head + context at push) is invalid. | |||||
This will be noticed when calling _push with the same head and context, | |||||
and the route will be failed immediately. | |||||
""" | |||||
self._bad_routes.add(self._stack_ident) | |||||
def _fail_route(self): | def _fail_route(self): | ||||
"""Fail the current tokenization route. | """Fail the current tokenization route. | ||||
@@ -151,7 +159,7 @@ class Tokenizer(object): | |||||
:exc:`.BadRoute`. | :exc:`.BadRoute`. | ||||
""" | """ | ||||
context = self._context | context = self._context | ||||
self._bad_routes.add(self._stack_ident) | |||||
self._memoize_bad_route() | |||||
self._pop() | self._pop() | ||||
raise BadRoute(context) | raise BadRoute(context) | ||||
@@ -506,12 +514,16 @@ class Tokenizer(object): | |||||
def _parse_external_link(self, brackets): | def _parse_external_link(self, brackets): | ||||
"""Parse an external link at the head of the wikicode string.""" | """Parse an external link at the head of the wikicode string.""" | ||||
if self._context & contexts.NO_EXT_LINKS or not self._can_recurse(): | |||||
if not brackets and self._context & contexts.DL_TERM: | |||||
self._handle_dl_term() | |||||
else: | |||||
self._emit_text(self._read()) | |||||
return | |||||
reset = self._head | reset = self._head | ||||
self._head += 1 | self._head += 1 | ||||
try: | try: | ||||
bad_context = self._context & contexts.NO_EXT_LINKS | |||||
if bad_context or not self._can_recurse(): | |||||
raise BadRoute() | |||||
link, extra, delta = self._really_parse_external_link(brackets) | link, extra, delta = self._really_parse_external_link(brackets) | ||||
except BadRoute: | except BadRoute: | ||||
self._head = reset | self._head = reset | ||||
@@ -719,6 +731,7 @@ class Tokenizer(object): | |||||
elif data.context & data.CX_NOTE_SPACE: | elif data.context & data.CX_NOTE_SPACE: | ||||
if data.context & data.CX_QUOTED: | if data.context & data.CX_QUOTED: | ||||
data.context = data.CX_ATTR_VALUE | data.context = data.CX_ATTR_VALUE | ||||
self._memoize_bad_route() | |||||
self._pop() | self._pop() | ||||
self._head = data.reset - 1 # Will be auto-incremented | self._head = data.reset - 1 # Will be auto-incremented | ||||
return # Break early | return # Break early | ||||
@@ -743,7 +756,13 @@ class Tokenizer(object): | |||||
data.context |= data.CX_QUOTED | data.context |= data.CX_QUOTED | ||||
data.quoter = chunk | data.quoter = chunk | ||||
data.reset = self._head | data.reset = self._head | ||||
self._push(self._context) | |||||
try: | |||||
self._push(self._context) | |||||
except BadRoute: | |||||
# Already failed to parse this as a quoted string | |||||
data.context = data.CX_ATTR_VALUE | |||||
self._head -= 1 | |||||
return | |||||
continue | continue | ||||
elif data.context & data.CX_QUOTED: | elif data.context & data.CX_QUOTED: | ||||
if chunk == data.quoter and not escaped: | if chunk == data.quoter and not escaped: | ||||
@@ -845,6 +864,7 @@ class Tokenizer(object): | |||||
if data.context & data.CX_QUOTED: | if data.context & data.CX_QUOTED: | ||||
# Unclosed attribute quote: reset, don't die | # Unclosed attribute quote: reset, don't die | ||||
data.context = data.CX_ATTR_VALUE | data.context = data.CX_ATTR_VALUE | ||||
self._memoize_bad_route() | |||||
self._pop() | self._pop() | ||||
self._head = data.reset | self._head = data.reset | ||||
continue | continue | ||||
@@ -1084,6 +1104,7 @@ class Tokenizer(object): | |||||
if data.context & data.CX_QUOTED: | if data.context & data.CX_QUOTED: | ||||
# Unclosed attribute quote: reset, don't die | # Unclosed attribute quote: reset, don't die | ||||
data.context = data.CX_ATTR_VALUE | data.context = data.CX_ATTR_VALUE | ||||
self._memoize_bad_route() | |||||
self._pop() | self._pop() | ||||
self._head = data.reset | self._head = data.reset | ||||
continue | continue | ||||
@@ -1,5 +1,7 @@ | |||||
#! /usr/bin/env bash | #! /usr/bin/env bash | ||||
set -euo pipefail | |||||
if [[ -z "$1" ]]; then | if [[ -z "$1" ]]; then | ||||
echo "usage: $0 1.2.3" | echo "usage: $0 1.2.3" | ||||
exit 1 | exit 1 | ||||
@@ -1,7 +1,7 @@ | |||||
#! /usr/bin/env python | #! /usr/bin/env python | ||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2018 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -107,6 +107,7 @@ setup( | |||||
"Programming Language :: Python :: 3.4", | "Programming Language :: Python :: 3.4", | ||||
"Programming Language :: Python :: 3.5", | "Programming Language :: Python :: 3.5", | ||||
"Programming Language :: Python :: 3.6", | "Programming Language :: Python :: 3.6", | ||||
"Programming Language :: Python :: 3.7", | |||||
"Topic :: Text Processing :: Markup" | "Topic :: Text Processing :: Markup" | ||||
], | ], | ||||
) | ) |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,6 +21,7 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
from difflib import unified_diff | |||||
try: | try: | ||||
import unittest2 as unittest | import unittest2 as unittest | ||||
@@ -30,6 +31,8 @@ except ImportError: | |||||
from mwparserfromhell.compat import str | from mwparserfromhell.compat import str | ||||
from mwparserfromhell.nodes import HTMLEntity, Template, Text | from mwparserfromhell.nodes import HTMLEntity, Template, Text | ||||
from mwparserfromhell.nodes.extras import Parameter | from mwparserfromhell.nodes.extras import Parameter | ||||
from mwparserfromhell import parse | |||||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | ||||
pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True) | pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True) | ||||
@@ -287,7 +290,7 @@ class TestTemplate(TreeEqualityTestCase): | |||||
self.assertIsInstance(node12.params[1].value.get(1), HTMLEntity) | self.assertIsInstance(node12.params[1].value.get(1), HTMLEntity) | ||||
self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|\nh = i}}", node13) | self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|\nh = i}}", node13) | ||||
self.assertEqual("{{a\n|b =c\n|d = e|f =g\n|h = i\n|j =k\n}}", node14) | self.assertEqual("{{a\n|b =c\n|d = e|f =g\n|h = i\n|j =k\n}}", node14) | ||||
self.assertEqual("{{a|b = c\n|\nd = e|\nf =g |h =i}}", node15) | |||||
self.assertEqual("{{a|b = c\n|\nd = e|\nf =g |\nh = i}}", node15) | |||||
self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|h=i}}", node16) | self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|h=i}}", node16) | ||||
self.assertEqual("{{a|b|c}}", node17) | self.assertEqual("{{a|b|c}}", node17) | ||||
self.assertEqual("{{a|b|3=c}}", node18) | self.assertEqual("{{a|b|3=c}}", node18) | ||||
@@ -439,5 +442,172 @@ class TestTemplate(TreeEqualityTestCase): | |||||
self.assertEqual("{{foo|a=b|c=d|e=f|a=|a=b}}", node26) | self.assertEqual("{{foo|a=b|c=d|e=f|a=|a=b}}", node26) | ||||
self.assertRaises(ValueError, node27.remove, node28.get(1)) | self.assertRaises(ValueError, node27.remove, node28.get(1)) | ||||
def test_formatting(self): | |||||
"""test realistic param manipulation with complex whitespace formatting | |||||
(assumes that parsing works correctly)""" | |||||
tests = [ | |||||
# https://en.wikipedia.org/w/index.php?title=Lamar_County,_Georgia&oldid=792356004 | |||||
("""{{Infobox U.S. county | |||||
| county = Lamar County | |||||
| state = Georgia | |||||
| seal = | |||||
| founded = 1920 | |||||
| seat wl = Barnesville | |||||
| largest city wl = Barnesville | |||||
| area_total_sq_mi = 186 | |||||
| area_land_sq_mi = 184 | |||||
| area_water_sq_mi = 2.3 | |||||
| area percentage = 1.3% | |||||
| census yr = 2010 | |||||
| pop = 18317 | |||||
| density_sq_mi = 100 | |||||
| time zone = Eastern | |||||
| footnotes = | |||||
| web = www.lamarcountyga.com | |||||
| ex image = Lamar County Georgia Courthouse.jpg | |||||
| ex image cap = Lamar County courthouse in Barnesville | |||||
| district = 3rd | |||||
| named for = [[Lucius Quintus Cincinnatus Lamar II]] | |||||
}}""", | |||||
"""@@ -11,4 +11,4 @@ | |||||
| area percentage = 1.3% | |||||
-| census yr = 2010 | |||||
-| pop = 18317 | |||||
+| census estimate yr = 2016 | |||||
+| pop = 12345<ref>example ref</ref> | |||||
| density_sq_mi = 100"""), | |||||
# https://en.wikipedia.org/w/index.php?title=Rockdale_County,_Georgia&oldid=792359760 | |||||
("""{{Infobox U.S. County| | |||||
county = Rockdale County | | |||||
state = Georgia | | |||||
seal = | | |||||
founded = October 18, 1870 | | |||||
seat wl = Conyers | | |||||
largest city wl = Conyers | | |||||
area_total_sq_mi = 132 | | |||||
area_land_sq_mi = 130 | | |||||
area_water_sq_mi = 2.3 | | |||||
area percentage = 1.7% | | |||||
census yr = 2010| | |||||
pop = 85215 | | |||||
density_sq_mi = 657 | | |||||
web = www.rockdalecounty.org | |||||
| ex image = Rockdale-county-courthouse.jpg | |||||
| ex image cap = Rockdale County Courthouse in Conyers | |||||
| district = 4th | |||||
| time zone= Eastern | |||||
}}""", | |||||
"""@@ -11,4 +11,4 @@ | |||||
area percentage = 1.7% | | |||||
- census yr = 2010| | |||||
- pop = 85215 | | |||||
+ census estimate yr = 2016 | | |||||
+ pop = 12345<ref>example ref</ref> | | |||||
density_sq_mi = 657 |"""), | |||||
# https://en.wikipedia.org/w/index.php?title=Spalding_County,_Georgia&oldid=792360413 | |||||
("""{{Infobox U.S. County| | |||||
| county = Spalding County | | |||||
| state = Georgia | | |||||
| seal = | | |||||
| founded = 1851 | | |||||
| seat wl = Griffin | | |||||
| largest city wl = Griffin | | |||||
| area_total_sq_mi = 200 | | |||||
| area_land_sq_mi = 196 | | |||||
| area_water_sq_mi = 3.1 | | |||||
| area percentage = 1.6% | | |||||
| census yr = 2010| | |||||
| pop = 64073 | | |||||
| density_sq_mi = 326 | | |||||
| web = www.spaldingcounty.com | | |||||
| named for = [[Thomas Spalding]] | |||||
| ex image = Spalding County Courthouse (NE corner).JPG | |||||
| ex image cap = Spalding County Courthouse in Griffin | |||||
| district = 3rd | |||||
| time zone = Eastern | |||||
}}""", | |||||
"""@@ -11,4 +11,4 @@ | |||||
| area percentage = 1.6% | | |||||
-| census yr = 2010| | |||||
-| pop = 64073 | | |||||
+| | |||||
+| census estimate yr = 2016 | pop = 12345<ref>example ref</ref> | | |||||
| density_sq_mi = 326 |"""), | |||||
# https://en.wikipedia.org/w/index.php?title=Clinton_County,_Illinois&oldid=794694648 | |||||
("""{{Infobox U.S. county | |||||
|county = Clinton County | |||||
|state = Illinois | |||||
| ex image = File:Clinton County Courthouse, Carlyle.jpg | |||||
| ex image cap = [[Clinton County Courthouse (Illinois)|Clinton County Courthouse]] | |||||
|seal = | |||||
|founded = 1824 | |||||
|named for = [[DeWitt Clinton]] | |||||
|seat wl= Carlyle | |||||
| largest city wl = Breese | |||||
|time zone=Central | |||||
|area_total_sq_mi = 503 | |||||
|area_land_sq_mi = 474 | |||||
|area_water_sq_mi = 29 | |||||
|area percentage = 5.8% | |||||
|census yr = 2010 | |||||
|pop = 37762 | |||||
|density_sq_mi = 80 | |||||
|web = www.clintonco.illinois.gov | |||||
| district = 15th | |||||
}}""", | |||||
"""@@ -15,4 +15,4 @@ | |||||
|area percentage = 5.8% | |||||
- |census yr = 2010 | |||||
- |pop = 37762 | |||||
+ |census estimate yr = 2016 | |||||
+ |pop = 12345<ref>example ref</ref> | |||||
|density_sq_mi = 80"""), | |||||
# https://en.wikipedia.org/w/index.php?title=Winnebago_County,_Illinois&oldid=789193800 | |||||
("""{{Infobox U.S. county | | |||||
county = Winnebago County | | |||||
state = Illinois | | |||||
seal = Winnebago County il seal.png | | |||||
named for = [[Winnebago (tribe)|Winnebago Tribe]] | | |||||
seat wl= Rockford | | |||||
largest city wl = Rockford| | |||||
area_total_sq_mi = 519 | | |||||
area_land_sq_mi = 513| | |||||
area_water_sq_mi = 5.9 | | |||||
area percentage = 1.1% | | |||||
census yr = 2010| | |||||
pop = 295266 | | |||||
density_sq_mi = 575 | |||||
| web = www.wincoil.us | |||||
| founded year = 1836 | |||||
| founded date = January 16 | |||||
| time zone = Central | |||||
| district = 16th | |||||
| district2 = 17th | |||||
}}""", | |||||
"""@@ -11,4 +11,4 @@ | |||||
area percentage = 1.1% | | |||||
- census yr = 2010| | |||||
- pop = 295266 | | |||||
+ census estimate yr = 2016| | |||||
+ pop = 12345<ref>example ref</ref> | | |||||
density_sq_mi = 575""")] | |||||
for (original, expected) in tests: | |||||
code = parse(original) | |||||
template = code.filter_templates()[0] | |||||
template.add("pop", "12345<ref>example ref</ref>") | |||||
template.add('census estimate yr', "2016", before="pop") | |||||
template.remove("census yr") | |||||
oldlines = original.splitlines(True) | |||||
newlines = str(code).splitlines(True) | |||||
difflines = unified_diff(oldlines, newlines, n=1) | |||||
diff = "".join(list(difflines)[2:]).strip() | |||||
self.assertEqual(expected, diff) | |||||
if __name__ == "__main__": | if __name__ == "__main__": | ||||
unittest.main(verbosity=2) | unittest.main(verbosity=2) |