Browse Source

Some improvements to whitespace recognition; unit tests (#185).

tags/v0.5.1
Ben Kurtovic 6 years ago
parent
commit
7a30e47f76
5 changed files with 235 additions and 6 deletions
  1. +2
    -1
      CHANGELOG
  2. +2
    -1
      docs/changelog.rst
  3. +1
    -1
      mwparserfromhell/__init__.py
  4. +3
    -1
      mwparserfromhell/nodes/template.py
  5. +227
    -2
      tests/test_template.py

+ 2
- 1
CHANGELOG View File

@@ -1,6 +1,7 @@
v0.6 (unreleased):

- ...
- Improved behavior when adding parameters to templates (via Template.add())
with poorly formatted whitespace conventions.

v0.5 (released June 23, 2017):



+ 2
- 1
docs/changelog.rst View File

@@ -7,7 +7,8 @@ v0.6
Unreleased
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.5...develop>`__):

- ...
- Improved behavior when adding parameters to templates (via
:meth:`.Template.add`) with poorly formatted whitespace conventions.

v0.5
----


+ 1
- 1
mwparserfromhell/__init__.py View File

@@ -27,7 +27,7 @@ outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode.
"""

__author__ = "Ben Kurtovic"
__copyright__ = "Copyright (C) 2012, 2013, 2014, 2015, 2016 Ben Kurtovic"
__copyright__ = "Copyright (C) 2012-2017 Ben Kurtovic"
__license__ = "MIT License"
__version__ = "0.6.dev0"
__email__ = "ben.kurtovic@gmail.com"


+ 3
- 1
mwparserfromhell/nodes/template.py View File

@@ -101,7 +101,7 @@ class Template(Node):
values = tuple(theories.values())
best = max(values)
confidence = float(best) / sum(values)
if confidence >= 0.75:
if confidence > 0.5:
return tuple(theories.keys())[values.index(best)]

@staticmethod
@@ -130,6 +130,8 @@ class Template(Node):
before_theories = defaultdict(lambda: 0)
after_theories = defaultdict(lambda: 0)
for param in self.params:
if not param.showkey:
continue
if use_names:
component = str(param.name)
else:


+ 227
- 2
tests/test_template.py View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -30,6 +30,8 @@ except ImportError:
from mwparserfromhell.compat import str
from mwparserfromhell.nodes import HTMLEntity, Template, Text
from mwparserfromhell.nodes.extras import Parameter
from mwparserfromhell import parse

from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext

pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True)
@@ -287,7 +289,7 @@ class TestTemplate(TreeEqualityTestCase):
self.assertIsInstance(node12.params[1].value.get(1), HTMLEntity)
self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|\nh = i}}", node13)
self.assertEqual("{{a\n|b =c\n|d = e|f =g\n|h = i\n|j =k\n}}", node14)
self.assertEqual("{{a|b = c\n|\nd = e|\nf =g |h =i}}", node15)
self.assertEqual("{{a|b = c\n|\nd = e|\nf =g |\nh = i}}", node15)
self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|h=i}}", node16)
self.assertEqual("{{a|b|c}}", node17)
self.assertEqual("{{a|b|3=c}}", node18)
@@ -439,5 +441,228 @@ class TestTemplate(TreeEqualityTestCase):
self.assertEqual("{{foo|a=b|c=d|e=f|a=|a=b}}", node26)
self.assertRaises(ValueError, node27.remove, node28.get(1))

def test_formatting(self):
"""test realistic param manipulation with complex whitespace formatting
(assumes that parsing works correctly)"""
tests = [
# https://en.wikipedia.org/w/index.php?title=Lamar_County,_Georgia&oldid=792356004
("""{{Infobox U.S. county
| county = Lamar County
| state = Georgia
| seal =
| founded = 1920
| seat wl = Barnesville
| largest city wl = Barnesville
| area_total_sq_mi = 186
| area_land_sq_mi = 184
| area_water_sq_mi = 2.3
| area percentage = 1.3%
| census yr = 2010
| pop = 18317
| density_sq_mi = 100
| time zone = Eastern
| footnotes =
| web = www.lamarcountyga.com
| ex image = Lamar County Georgia Courthouse.jpg
| ex image cap = Lamar County courthouse in Barnesville
| district = 3rd
| named for = [[Lucius Quintus Cincinnatus Lamar II]]
}}""", """{{Infobox U.S. county
| county = Lamar County
| state = Georgia
| seal =
| founded = 1920
| seat wl = Barnesville
| largest city wl = Barnesville
| area_total_sq_mi = 186
| area_land_sq_mi = 184
| area_water_sq_mi = 2.3
| area percentage = 1.3%
| census estimate yr = 2016
| pop = 12345<ref>example ref</ref>
| density_sq_mi = 100
| time zone = Eastern
| footnotes =
| web = www.lamarcountyga.com
| ex image = Lamar County Georgia Courthouse.jpg
| ex image cap = Lamar County courthouse in Barnesville
| district = 3rd
| named for = [[Lucius Quintus Cincinnatus Lamar II]]
}}"""),
# https://en.wikipedia.org/w/index.php?title=Rockdale_County,_Georgia&oldid=792359760
("""{{Infobox U.S. County|
county = Rockdale County |
state = Georgia |
seal = |
founded = October 18, 1870 |
seat wl = Conyers |
largest city wl = Conyers |
area_total_sq_mi = 132 |
area_land_sq_mi = 130 |
area_water_sq_mi = 2.3 |
area percentage = 1.7% |
census yr = 2010|
pop = 85215 |
density_sq_mi = 657 |
web = www.rockdalecounty.org
| ex image = Rockdale-county-courthouse.jpg
| ex image cap = Rockdale County Courthouse in Conyers
| district = 4th
| time zone= Eastern
}}""", """{{Infobox U.S. County|
county = Rockdale County |
state = Georgia |
seal = |
founded = October 18, 1870 |
seat wl = Conyers |
largest city wl = Conyers |
area_total_sq_mi = 132 |
area_land_sq_mi = 130 |
area_water_sq_mi = 2.3 |
area percentage = 1.7% |
census estimate yr = 2016 |
pop = 12345<ref>example ref</ref> |
density_sq_mi = 657 |
web = www.rockdalecounty.org
| ex image = Rockdale-county-courthouse.jpg
| ex image cap = Rockdale County Courthouse in Conyers
| district = 4th
| time zone= Eastern
}}"""),
# https://en.wikipedia.org/w/index.php?title=Spalding_County,_Georgia&oldid=792360413
("""{{Infobox U.S. County|
| county = Spalding County |
| state = Georgia |
| seal = |
| founded = 1851 |
| seat wl = Griffin |
| largest city wl = Griffin |
| area_total_sq_mi = 200 |
| area_land_sq_mi = 196 |
| area_water_sq_mi = 3.1 |
| area percentage = 1.6% |
| census yr = 2010|
| pop = 64073 |
| density_sq_mi = 326 |
| web = www.spaldingcounty.com |
| named for = [[Thomas Spalding]]
| ex image = Spalding County Courthouse (NE corner).JPG
| ex image cap = Spalding County Courthouse in Griffin
| district = 3rd
| time zone = Eastern
}}""", """{{Infobox U.S. County|
| county = Spalding County |
| state = Georgia |
| seal = |
| founded = 1851 |
| seat wl = Griffin |
| largest city wl = Griffin |
| area_total_sq_mi = 200 |
| area_land_sq_mi = 196 |
| area_water_sq_mi = 3.1 |
| area percentage = 1.6% |
|
| census estimate yr = 2016 | pop = 12345<ref>example ref</ref> |
| density_sq_mi = 326 |
| web = www.spaldingcounty.com |
| named for = [[Thomas Spalding]]
| ex image = Spalding County Courthouse (NE corner).JPG
| ex image cap = Spalding County Courthouse in Griffin
| district = 3rd
| time zone = Eastern
}}"""),
# https://en.wikipedia.org/w/index.php?title=Clinton_County,_Illinois&oldid=794694648
("""{{Infobox U.S. county
|county = Clinton County
|state = Illinois
| ex image = File:Clinton County Courthouse, Carlyle.jpg
| ex image cap = [[Clinton County Courthouse (Illinois)|Clinton County Courthouse]]
|seal =
|founded = 1824
|named for = [[DeWitt Clinton]]
|seat wl= Carlyle
| largest city wl = Breese
|time zone=Central
|area_total_sq_mi = 503
|area_land_sq_mi = 474
|area_water_sq_mi = 29
|area percentage = 5.8%
|census yr = 2010
|pop = 37762
|density_sq_mi = 80
|web = www.clintonco.illinois.gov
| district = 15th
}}""", """{{Infobox U.S. county
|county = Clinton County
|state = Illinois
| ex image = File:Clinton County Courthouse, Carlyle.jpg
| ex image cap = [[Clinton County Courthouse (Illinois)|Clinton County Courthouse]]
|seal =
|founded = 1824
|named for = [[DeWitt Clinton]]
|seat wl= Carlyle
| largest city wl = Breese
|time zone=Central
|area_total_sq_mi = 503
|area_land_sq_mi = 474
|area_water_sq_mi = 29
|area percentage = 5.8%
|census estimate yr = 2016
|pop = 12345<ref>example ref</ref>
|density_sq_mi = 80
|web = www.clintonco.illinois.gov
| district = 15th
}}"""),
# https://en.wikipedia.org/w/index.php?title=Winnebago_County,_Illinois&oldid=789193800
("""{{Infobox U.S. county |
county = Winnebago County |
state = Illinois |
seal = Winnebago County il seal.png |
named for = [[Winnebago (tribe)|Winnebago Tribe]] |
seat wl= Rockford |
largest city wl = Rockford|
area_total_sq_mi = 519 |
area_land_sq_mi = 513|
area_water_sq_mi = 5.9 |
area percentage = 1.1% |
census yr = 2010|
pop = 295266 |
density_sq_mi = 575
| web = www.wincoil.us
| founded year = 1836
| founded date = January 16
| time zone = Central
| district = 16th
| district2 = 17th
}}""", """{{Infobox U.S. county |
county = Winnebago County |
state = Illinois |
seal = Winnebago County il seal.png |
named for = [[Winnebago (tribe)|Winnebago Tribe]] |
seat wl= Rockford |
largest city wl = Rockford|
area_total_sq_mi = 519 |
area_land_sq_mi = 513|
area_water_sq_mi = 5.9 |
area percentage = 1.1% |
census estimate yr = 2016|
pop = 12345<ref>example ref</ref> |
density_sq_mi = 575
| web = www.wincoil.us
| founded year = 1836
| founded date = January 16
| time zone = Central
| district = 16th
| district2 = 17th
}}""")]

for (original, expected) in tests:
code = parse(original)
template = code.filter_templates()[0]
template.add("pop", "12345<ref>example ref</ref>")
template.add('census estimate yr', "2016", before="pop")
template.remove("census yr")
self.assertEqual(expected, str(code))

if __name__ == "__main__":
unittest.main(verbosity=2)

Loading…
Cancel
Save