Browse Source

Merge develop into master (release/0.5.1)

undefined
Ben Kurtovic 6 years ago
parent
commit
c8df09469e
14 changed files with 265 additions and 30 deletions
  1. +7
    -0
      CHANGELOG
  2. +1
    -1
      LICENSE
  3. +1
    -1
      appveyor.yml
  4. +13
    -0
      docs/changelog.rst
  5. +1
    -1
      docs/conf.py
  6. +3
    -3
      mwparserfromhell/__init__.py
  7. +3
    -1
      mwparserfromhell/nodes/template.py
  8. +12
    -4
      mwparserfromhell/parser/ctokenizer/tok_parse.c
  9. +19
    -9
      mwparserfromhell/parser/ctokenizer/tok_support.c
  10. +2
    -1
      mwparserfromhell/parser/ctokenizer/tok_support.h
  11. +27
    -6
      mwparserfromhell/parser/tokenizer.py
  12. +2
    -0
      scripts/release.sh
  13. +2
    -1
      setup.py
  14. +172
    -2
      tests/test_template.py

+ 7
- 0
CHANGELOG View File

@@ -1,3 +1,10 @@
v0.5.1 (released March 03, 2018):

- Improved behavior when adding parameters to templates (via Template.add())
with poorly formatted whitespace conventions. (#185)
- Fixed the parser getting stuck in deeply nested HTML tags with unclosed,
quoted attributes. (#190)

v0.5 (released June 23, 2017): v0.5 (released June 23, 2017):


- Added Wikicode.contains() to determine whether a Node or Wikicode object is - Added Wikicode.contains() to determine whether a Node or Wikicode object is


+ 1
- 1
LICENSE View File

@@ -1,4 +1,4 @@
Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com>
Copyright (C) 2012-2018 Ben Kurtovic <ben.kurtovic@gmail.com>


Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal of this software and associated documentation files (the "Software"), to deal


+ 1
- 1
appveyor.yml View File

@@ -1,6 +1,6 @@
# This config file is used by appveyor.com to build Windows release binaries # This config file is used by appveyor.com to build Windows release binaries


version: 0.5-b{build}
version: 0.5.1-b{build}


branches: branches:
only: only:


+ 13
- 0
docs/changelog.rst View File

@@ -1,6 +1,19 @@
Changelog Changelog
========= =========


v0.5.1
------

`Released March 03, 2018 <https://github.com/earwig/mwparserfromhell/tree/v0.5.1>`_
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.5...v0.5.1>`__):

- Improved behavior when adding parameters to templates (via
:meth:`.Template.add`) with poorly formatted whitespace conventions.
(`#185 <https://github.com/earwig/mwparserfromhell/issues/185>`_)
- Fixed the parser getting stuck in deeply nested HTML tags with unclosed,
quoted attributes.
(`#190 <https://github.com/earwig/mwparserfromhell/issues/190>`_)

v0.5 v0.5
---- ----




+ 1
- 1
docs/conf.py View File

@@ -42,7 +42,7 @@ master_doc = 'index'


# General information about the project. # General information about the project.
project = u'mwparserfromhell' project = u'mwparserfromhell'
copyright = u'2012, 2013, 2014, 2015, 2016, 2017 Ben Kurtovic'
copyright = u'2012–2018 Ben Kurtovic'


# The version info for the project you're documenting, acts as replacement for # The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the # |version| and |release|, also used in various other places throughout the


+ 3
- 3
mwparserfromhell/__init__.py View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2018 Ben Kurtovic <ben.kurtovic@gmail.com>
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy # Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal # of this software and associated documentation files (the "Software"), to deal
@@ -27,9 +27,9 @@ outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode.
""" """


__author__ = "Ben Kurtovic" __author__ = "Ben Kurtovic"
__copyright__ = "Copyright (C) 2012, 2013, 2014, 2015, 2016 Ben Kurtovic"
__copyright__ = "Copyright (C) 2012-2018 Ben Kurtovic"
__license__ = "MIT License" __license__ = "MIT License"
__version__ = "0.5"
__version__ = "0.5.1"
__email__ = "ben.kurtovic@gmail.com" __email__ = "ben.kurtovic@gmail.com"


from . import (compat, definitions, nodes, parser, smart_list, string_mixin, from . import (compat, definitions, nodes, parser, smart_list, string_mixin,


+ 3
- 1
mwparserfromhell/nodes/template.py View File

@@ -101,7 +101,7 @@ class Template(Node):
values = tuple(theories.values()) values = tuple(theories.values())
best = max(values) best = max(values)
confidence = float(best) / sum(values) confidence = float(best) / sum(values)
if confidence >= 0.75:
if confidence > 0.5:
return tuple(theories.keys())[values.index(best)] return tuple(theories.keys())[values.index(best)]


@staticmethod @staticmethod
@@ -130,6 +130,8 @@ class Template(Node):
before_theories = defaultdict(lambda: 0) before_theories = defaultdict(lambda: 0)
after_theories = defaultdict(lambda: 0) after_theories = defaultdict(lambda: 0)
for param in self.params: for param in self.params:
if not param.showkey:
continue
if use_names: if use_names:
component = str(param.name) component = str(param.name)
else: else:


+ 12
- 4
mwparserfromhell/parser/ctokenizer/tok_parse.c View File

@@ -1,5 +1,5 @@
/* /*
Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com>
Copyright (C) 2012-2018 Ben Kurtovic <ben.kurtovic@gmail.com>


Permission is hereby granted, free of charge, to any person obtaining a copy of Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in this software and associated documentation files (the "Software"), to deal in
@@ -722,7 +722,6 @@ Tokenizer_remove_uri_scheme_from_textbuffer(Tokenizer* self, PyObject* link)
*/ */
static int Tokenizer_parse_external_link(Tokenizer* self, int brackets) static int Tokenizer_parse_external_link(Tokenizer* self, int brackets)
{ {
#define INVALID_CONTEXT self->topstack->context & AGG_NO_EXT_LINKS
#define NOT_A_LINK \ #define NOT_A_LINK \
if (!brackets && self->topstack->context & LC_DLTERM) \ if (!brackets && self->topstack->context & LC_DLTERM) \
return Tokenizer_handle_dl_term(self); \ return Tokenizer_handle_dl_term(self); \
@@ -732,7 +731,8 @@ static int Tokenizer_parse_external_link(Tokenizer* self, int brackets)
PyObject *link, *kwargs; PyObject *link, *kwargs;
Textbuffer *extra; Textbuffer *extra;


if (INVALID_CONTEXT || !(Tokenizer_CAN_RECURSE(self))) {
if (self->topstack->context & AGG_NO_EXT_LINKS ||
!(Tokenizer_CAN_RECURSE(self))) {
NOT_A_LINK; NOT_A_LINK;
} }
extra = Textbuffer_new(&self->text); extra = Textbuffer_new(&self->text);
@@ -1280,6 +1280,7 @@ static int Tokenizer_handle_tag_data(
else if (data->context & TAG_NOTE_SPACE) { else if (data->context & TAG_NOTE_SPACE) {
if (data->context & TAG_QUOTED) { if (data->context & TAG_QUOTED) {
data->context = TAG_ATTR_VALUE; data->context = TAG_ATTR_VALUE;
Tokenizer_memoize_bad_route(self);
trash = Tokenizer_pop(self); trash = Tokenizer_pop(self);
Py_XDECREF(trash); Py_XDECREF(trash);
self->head = data->reset - 1; // Will be auto-incremented self->head = data->reset - 1; // Will be auto-incremented
@@ -1317,7 +1318,12 @@ static int Tokenizer_handle_tag_data(
data->context |= TAG_QUOTED; data->context |= TAG_QUOTED;
data->quoter = chunk; data->quoter = chunk;
data->reset = self->head; data->reset = self->head;
if (Tokenizer_push(self, self->topstack->context))
if (Tokenizer_check_route(self, self->topstack->context) < 0) {
RESET_ROUTE();
data->context = TAG_ATTR_VALUE;
self->head--;
}
else if (Tokenizer_push(self, self->topstack->context))
return -1; return -1;
return 0; return 0;
} }
@@ -1613,6 +1619,7 @@ static PyObject* Tokenizer_really_parse_tag(Tokenizer* self)
if (data->context & TAG_QUOTED) { if (data->context & TAG_QUOTED) {
// Unclosed attribute quote: reset, don't die // Unclosed attribute quote: reset, don't die
data->context = TAG_ATTR_VALUE; data->context = TAG_ATTR_VALUE;
Tokenizer_memoize_bad_route(self);
trash = Tokenizer_pop(self); trash = Tokenizer_pop(self);
Py_XDECREF(trash); Py_XDECREF(trash);
self->head = data->reset; self->head = data->reset;
@@ -2185,6 +2192,7 @@ static PyObject* Tokenizer_handle_table_style(Tokenizer* self, Unicode end_token
if (data->context & TAG_QUOTED) { if (data->context & TAG_QUOTED) {
// Unclosed attribute quote: reset, don't die // Unclosed attribute quote: reset, don't die
data->context = TAG_ATTR_VALUE; data->context = TAG_ATTR_VALUE;
Tokenizer_memoize_bad_route(self);
trash = Tokenizer_pop(self); trash = Tokenizer_pop(self);
Py_XDECREF(trash); Py_XDECREF(trash);
self->head = data->reset; self->head = data->reset;


+ 19
- 9
mwparserfromhell/parser/ctokenizer/tok_support.c View File

@@ -1,5 +1,5 @@
/* /*
Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com>
Copyright (C) 2012-2018 Ben Kurtovic <ben.kurtovic@gmail.com>


Permission is hereby granted, free of charge, to any person obtaining a copy of Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in this software and associated documentation files (the "Software"), to deal in
@@ -147,6 +147,22 @@ static int compare_nodes(
} }


/* /*
Remember that the current route (head + context at push) is invalid.

This will be noticed when calling Tokenizer_check_route with the same head
and context, and the route will be failed immediately.
*/
void Tokenizer_memoize_bad_route(Tokenizer *self)
{
route_tree_node *node = malloc(sizeof(route_tree_node));
if (node) {
node->id = self->topstack->ident;
if (avl_tree_insert(&self->bad_routes, &node->node, compare_nodes))
free(node);
}
}

/*
Fail the current tokenization route. Discards the current Fail the current tokenization route. Discards the current
stack/context/textbuffer and sets the BAD_ROUTE flag. Also records the stack/context/textbuffer and sets the BAD_ROUTE flag. Also records the
ident of the failed stack so future parsing attempts down this route can be ident of the failed stack so future parsing attempts down this route can be
@@ -157,13 +173,7 @@ void* Tokenizer_fail_route(Tokenizer* self)
uint64_t context = self->topstack->context; uint64_t context = self->topstack->context;
PyObject* stack; PyObject* stack;


route_tree_node *node = malloc(sizeof(route_tree_node));
if (node) {
node->id = self->topstack->ident;
if (avl_tree_insert(&self->bad_routes, &node->node, compare_nodes))
free(node);
}

Tokenizer_memoize_bad_route(self);
stack = Tokenizer_pop(self); stack = Tokenizer_pop(self);
Py_XDECREF(stack); Py_XDECREF(stack);
FAIL_ROUTE(context); FAIL_ROUTE(context);
@@ -173,7 +183,7 @@ void* Tokenizer_fail_route(Tokenizer* self)
/* /*
Check if pushing a new route here with the given context would definitely Check if pushing a new route here with the given context would definitely
fail, based on a previous call to Tokenizer_fail_route() with the same fail, based on a previous call to Tokenizer_fail_route() with the same
stack.
stack. (Or any other call to Tokenizer_memoize_bad_route().)


Return 0 if safe and -1 if unsafe. The BAD_ROUTE flag will be set in the Return 0 if safe and -1 if unsafe. The BAD_ROUTE flag will be set in the
latter case. latter case.


+ 2
- 1
mwparserfromhell/parser/ctokenizer/tok_support.h View File

@@ -1,5 +1,5 @@
/* /*
Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com>
Copyright (C) 2012-2018 Ben Kurtovic <ben.kurtovic@gmail.com>


Permission is hereby granted, free of charge, to any person obtaining a copy of Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in this software and associated documentation files (the "Software"), to deal in
@@ -31,6 +31,7 @@ int Tokenizer_push_textbuffer(Tokenizer*);
void Tokenizer_delete_top_of_stack(Tokenizer*); void Tokenizer_delete_top_of_stack(Tokenizer*);
PyObject* Tokenizer_pop(Tokenizer*); PyObject* Tokenizer_pop(Tokenizer*);
PyObject* Tokenizer_pop_keeping_context(Tokenizer*); PyObject* Tokenizer_pop_keeping_context(Tokenizer*);
void Tokenizer_memoize_bad_route(Tokenizer*);
void* Tokenizer_fail_route(Tokenizer*); void* Tokenizer_fail_route(Tokenizer*);
int Tokenizer_check_route(Tokenizer*, uint64_t); int Tokenizer_check_route(Tokenizer*, uint64_t);
void Tokenizer_free_bad_route_tree(Tokenizer*); void Tokenizer_free_bad_route_tree(Tokenizer*);


+ 27
- 6
mwparserfromhell/parser/tokenizer.py View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2018 Ben Kurtovic <ben.kurtovic@gmail.com>
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy # Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal # of this software and associated documentation files (the "Software"), to deal
@@ -144,6 +144,14 @@ class Tokenizer(object):
"""Return whether or not our max recursion depth has been exceeded.""" """Return whether or not our max recursion depth has been exceeded."""
return self._depth < self.MAX_DEPTH return self._depth < self.MAX_DEPTH


def _memoize_bad_route(self):
"""Remember that the current route (head + context at push) is invalid.

This will be noticed when calling _push with the same head and context,
and the route will be failed immediately.
"""
self._bad_routes.add(self._stack_ident)

def _fail_route(self): def _fail_route(self):
"""Fail the current tokenization route. """Fail the current tokenization route.


@@ -151,7 +159,7 @@ class Tokenizer(object):
:exc:`.BadRoute`. :exc:`.BadRoute`.
""" """
context = self._context context = self._context
self._bad_routes.add(self._stack_ident)
self._memoize_bad_route()
self._pop() self._pop()
raise BadRoute(context) raise BadRoute(context)


@@ -506,12 +514,16 @@ class Tokenizer(object):


def _parse_external_link(self, brackets): def _parse_external_link(self, brackets):
"""Parse an external link at the head of the wikicode string.""" """Parse an external link at the head of the wikicode string."""
if self._context & contexts.NO_EXT_LINKS or not self._can_recurse():
if not brackets and self._context & contexts.DL_TERM:
self._handle_dl_term()
else:
self._emit_text(self._read())
return

reset = self._head reset = self._head
self._head += 1 self._head += 1
try: try:
bad_context = self._context & contexts.NO_EXT_LINKS
if bad_context or not self._can_recurse():
raise BadRoute()
link, extra, delta = self._really_parse_external_link(brackets) link, extra, delta = self._really_parse_external_link(brackets)
except BadRoute: except BadRoute:
self._head = reset self._head = reset
@@ -719,6 +731,7 @@ class Tokenizer(object):
elif data.context & data.CX_NOTE_SPACE: elif data.context & data.CX_NOTE_SPACE:
if data.context & data.CX_QUOTED: if data.context & data.CX_QUOTED:
data.context = data.CX_ATTR_VALUE data.context = data.CX_ATTR_VALUE
self._memoize_bad_route()
self._pop() self._pop()
self._head = data.reset - 1 # Will be auto-incremented self._head = data.reset - 1 # Will be auto-incremented
return # Break early return # Break early
@@ -743,7 +756,13 @@ class Tokenizer(object):
data.context |= data.CX_QUOTED data.context |= data.CX_QUOTED
data.quoter = chunk data.quoter = chunk
data.reset = self._head data.reset = self._head
self._push(self._context)
try:
self._push(self._context)
except BadRoute:
# Already failed to parse this as a quoted string
data.context = data.CX_ATTR_VALUE
self._head -= 1
return
continue continue
elif data.context & data.CX_QUOTED: elif data.context & data.CX_QUOTED:
if chunk == data.quoter and not escaped: if chunk == data.quoter and not escaped:
@@ -845,6 +864,7 @@ class Tokenizer(object):
if data.context & data.CX_QUOTED: if data.context & data.CX_QUOTED:
# Unclosed attribute quote: reset, don't die # Unclosed attribute quote: reset, don't die
data.context = data.CX_ATTR_VALUE data.context = data.CX_ATTR_VALUE
self._memoize_bad_route()
self._pop() self._pop()
self._head = data.reset self._head = data.reset
continue continue
@@ -1084,6 +1104,7 @@ class Tokenizer(object):
if data.context & data.CX_QUOTED: if data.context & data.CX_QUOTED:
# Unclosed attribute quote: reset, don't die # Unclosed attribute quote: reset, don't die
data.context = data.CX_ATTR_VALUE data.context = data.CX_ATTR_VALUE
self._memoize_bad_route()
self._pop() self._pop()
self._head = data.reset self._head = data.reset
continue continue


+ 2
- 0
scripts/release.sh View File

@@ -1,5 +1,7 @@
#! /usr/bin/env bash #! /usr/bin/env bash


set -euo pipefail

if [[ -z "$1" ]]; then if [[ -z "$1" ]]; then
echo "usage: $0 1.2.3" echo "usage: $0 1.2.3"
exit 1 exit 1


+ 2
- 1
setup.py View File

@@ -1,7 +1,7 @@
#! /usr/bin/env python #! /usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2018 Ben Kurtovic <ben.kurtovic@gmail.com>
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy # Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal # of this software and associated documentation files (the "Software"), to deal
@@ -107,6 +107,7 @@ setup(
"Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Topic :: Text Processing :: Markup" "Topic :: Text Processing :: Markup"
], ],
) )

+ 172
- 2
tests/test_template.py View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com>
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy # Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal # of this software and associated documentation files (the "Software"), to deal
@@ -21,6 +21,7 @@
# SOFTWARE. # SOFTWARE.


from __future__ import unicode_literals from __future__ import unicode_literals
from difflib import unified_diff


try: try:
import unittest2 as unittest import unittest2 as unittest
@@ -30,6 +31,8 @@ except ImportError:
from mwparserfromhell.compat import str from mwparserfromhell.compat import str
from mwparserfromhell.nodes import HTMLEntity, Template, Text from mwparserfromhell.nodes import HTMLEntity, Template, Text
from mwparserfromhell.nodes.extras import Parameter from mwparserfromhell.nodes.extras import Parameter
from mwparserfromhell import parse

from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext


pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True) pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True)
@@ -287,7 +290,7 @@ class TestTemplate(TreeEqualityTestCase):
self.assertIsInstance(node12.params[1].value.get(1), HTMLEntity) self.assertIsInstance(node12.params[1].value.get(1), HTMLEntity)
self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|\nh = i}}", node13) self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|\nh = i}}", node13)
self.assertEqual("{{a\n|b =c\n|d = e|f =g\n|h = i\n|j =k\n}}", node14) self.assertEqual("{{a\n|b =c\n|d = e|f =g\n|h = i\n|j =k\n}}", node14)
self.assertEqual("{{a|b = c\n|\nd = e|\nf =g |h =i}}", node15)
self.assertEqual("{{a|b = c\n|\nd = e|\nf =g |\nh = i}}", node15)
self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|h=i}}", node16) self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|h=i}}", node16)
self.assertEqual("{{a|b|c}}", node17) self.assertEqual("{{a|b|c}}", node17)
self.assertEqual("{{a|b|3=c}}", node18) self.assertEqual("{{a|b|3=c}}", node18)
@@ -439,5 +442,172 @@ class TestTemplate(TreeEqualityTestCase):
self.assertEqual("{{foo|a=b|c=d|e=f|a=|a=b}}", node26) self.assertEqual("{{foo|a=b|c=d|e=f|a=|a=b}}", node26)
self.assertRaises(ValueError, node27.remove, node28.get(1)) self.assertRaises(ValueError, node27.remove, node28.get(1))


def test_formatting(self):
"""test realistic param manipulation with complex whitespace formatting
(assumes that parsing works correctly)"""
tests = [
# https://en.wikipedia.org/w/index.php?title=Lamar_County,_Georgia&oldid=792356004
("""{{Infobox U.S. county
| county = Lamar County
| state = Georgia
| seal =
| founded = 1920
| seat wl = Barnesville
| largest city wl = Barnesville
| area_total_sq_mi = 186
| area_land_sq_mi = 184
| area_water_sq_mi = 2.3
| area percentage = 1.3%
| census yr = 2010
| pop = 18317
| density_sq_mi = 100
| time zone = Eastern
| footnotes =
| web = www.lamarcountyga.com
| ex image = Lamar County Georgia Courthouse.jpg
| ex image cap = Lamar County courthouse in Barnesville
| district = 3rd
| named for = [[Lucius Quintus Cincinnatus Lamar II]]
}}""",
"""@@ -11,4 +11,4 @@
| area percentage = 1.3%
-| census yr = 2010
-| pop = 18317
+| census estimate yr = 2016
+| pop = 12345<ref>example ref</ref>
| density_sq_mi = 100"""),

# https://en.wikipedia.org/w/index.php?title=Rockdale_County,_Georgia&oldid=792359760
("""{{Infobox U.S. County|
county = Rockdale County |
state = Georgia |
seal = |
founded = October 18, 1870 |
seat wl = Conyers |
largest city wl = Conyers |
area_total_sq_mi = 132 |
area_land_sq_mi = 130 |
area_water_sq_mi = 2.3 |
area percentage = 1.7% |
census yr = 2010|
pop = 85215 |
density_sq_mi = 657 |
web = www.rockdalecounty.org
| ex image = Rockdale-county-courthouse.jpg
| ex image cap = Rockdale County Courthouse in Conyers
| district = 4th
| time zone= Eastern
}}""",
"""@@ -11,4 +11,4 @@
area percentage = 1.7% |
- census yr = 2010|
- pop = 85215 |
+ census estimate yr = 2016 |
+ pop = 12345<ref>example ref</ref> |
density_sq_mi = 657 |"""),

# https://en.wikipedia.org/w/index.php?title=Spalding_County,_Georgia&oldid=792360413
("""{{Infobox U.S. County|
| county = Spalding County |
| state = Georgia |
| seal = |
| founded = 1851 |
| seat wl = Griffin |
| largest city wl = Griffin |
| area_total_sq_mi = 200 |
| area_land_sq_mi = 196 |
| area_water_sq_mi = 3.1 |
| area percentage = 1.6% |
| census yr = 2010|
| pop = 64073 |
| density_sq_mi = 326 |
| web = www.spaldingcounty.com |
| named for = [[Thomas Spalding]]
| ex image = Spalding County Courthouse (NE corner).JPG
| ex image cap = Spalding County Courthouse in Griffin
| district = 3rd
| time zone = Eastern
}}""",
"""@@ -11,4 +11,4 @@
| area percentage = 1.6% |
-| census yr = 2010|
-| pop = 64073 |
+|
+| census estimate yr = 2016 | pop = 12345<ref>example ref</ref> |
| density_sq_mi = 326 |"""),

# https://en.wikipedia.org/w/index.php?title=Clinton_County,_Illinois&oldid=794694648
("""{{Infobox U.S. county
|county = Clinton County
|state = Illinois
| ex image = File:Clinton County Courthouse, Carlyle.jpg
| ex image cap = [[Clinton County Courthouse (Illinois)|Clinton County Courthouse]]
|seal =
|founded = 1824
|named for = [[DeWitt Clinton]]
|seat wl= Carlyle
| largest city wl = Breese
|time zone=Central
|area_total_sq_mi = 503
|area_land_sq_mi = 474
|area_water_sq_mi = 29
|area percentage = 5.8%
|census yr = 2010
|pop = 37762
|density_sq_mi = 80
|web = www.clintonco.illinois.gov
| district = 15th
}}""",
"""@@ -15,4 +15,4 @@
|area percentage = 5.8%
- |census yr = 2010
- |pop = 37762
+ |census estimate yr = 2016
+ |pop = 12345<ref>example ref</ref>
|density_sq_mi = 80"""),

# https://en.wikipedia.org/w/index.php?title=Winnebago_County,_Illinois&oldid=789193800
("""{{Infobox U.S. county |
county = Winnebago County |
state = Illinois |
seal = Winnebago County il seal.png |
named for = [[Winnebago (tribe)|Winnebago Tribe]] |
seat wl= Rockford |
largest city wl = Rockford|
area_total_sq_mi = 519 |
area_land_sq_mi = 513|
area_water_sq_mi = 5.9 |
area percentage = 1.1% |
census yr = 2010|
pop = 295266 |
density_sq_mi = 575
| web = www.wincoil.us
| founded year = 1836
| founded date = January 16
| time zone = Central
| district = 16th
| district2 = 17th
}}""",
"""@@ -11,4 +11,4 @@
area percentage = 1.1% |
- census yr = 2010|
- pop = 295266 |
+ census estimate yr = 2016|
+ pop = 12345<ref>example ref</ref> |
density_sq_mi = 575""")]

for (original, expected) in tests:
code = parse(original)
template = code.filter_templates()[0]
template.add("pop", "12345<ref>example ref</ref>")
template.add('census estimate yr', "2016", before="pop")
template.remove("census yr")

oldlines = original.splitlines(True)
newlines = str(code).splitlines(True)
difflines = unified_diff(oldlines, newlines, n=1)
diff = "".join(list(difflines)[2:]).strip()
self.assertEqual(expected, diff)

if __name__ == "__main__": if __name__ == "__main__":
unittest.main(verbosity=2) unittest.main(verbosity=2)

Loading…
Cancel
Save