Browse Source

Some improvements to whitespace recognition; unit tests (#185).

tags/v0.5.1
Ben Kurtovic 7 years ago
parent
commit
7a30e47f76
5 changed files with 235 additions and 6 deletions
  1. +2
    -1
      CHANGELOG
  2. +2
    -1
      docs/changelog.rst
  3. +1
    -1
      mwparserfromhell/__init__.py
  4. +3
    -1
      mwparserfromhell/nodes/template.py
  5. +227
    -2
      tests/test_template.py

+ 2
- 1
CHANGELOG View File

@@ -1,6 +1,7 @@
v0.6 (unreleased): v0.6 (unreleased):


- ...
- Improved behavior when adding parameters to templates (via Template.add())
with poorly formatted whitespace conventions.


v0.5 (released June 23, 2017): v0.5 (released June 23, 2017):




+ 2
- 1
docs/changelog.rst View File

@@ -7,7 +7,8 @@ v0.6
Unreleased Unreleased
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.5...develop>`__): (`changes <https://github.com/earwig/mwparserfromhell/compare/v0.5...develop>`__):


- ...
- Improved behavior when adding parameters to templates (via
:meth:`.Template.add`) with poorly formatted whitespace conventions.


v0.5 v0.5
---- ----


+ 1
- 1
mwparserfromhell/__init__.py View File

@@ -27,7 +27,7 @@ outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode.
""" """


__author__ = "Ben Kurtovic" __author__ = "Ben Kurtovic"
__copyright__ = "Copyright (C) 2012, 2013, 2014, 2015, 2016 Ben Kurtovic"
__copyright__ = "Copyright (C) 2012-2017 Ben Kurtovic"
__license__ = "MIT License" __license__ = "MIT License"
__version__ = "0.6.dev0" __version__ = "0.6.dev0"
__email__ = "ben.kurtovic@gmail.com" __email__ = "ben.kurtovic@gmail.com"


+ 3
- 1
mwparserfromhell/nodes/template.py View File

@@ -101,7 +101,7 @@ class Template(Node):
values = tuple(theories.values()) values = tuple(theories.values())
best = max(values) best = max(values)
confidence = float(best) / sum(values) confidence = float(best) / sum(values)
if confidence >= 0.75:
if confidence > 0.5:
return tuple(theories.keys())[values.index(best)] return tuple(theories.keys())[values.index(best)]


@staticmethod @staticmethod
@@ -130,6 +130,8 @@ class Template(Node):
before_theories = defaultdict(lambda: 0) before_theories = defaultdict(lambda: 0)
after_theories = defaultdict(lambda: 0) after_theories = defaultdict(lambda: 0)
for param in self.params: for param in self.params:
if not param.showkey:
continue
if use_names: if use_names:
component = str(param.name) component = str(param.name)
else: else:


+ 227
- 2
tests/test_template.py View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2012-2017 Ben Kurtovic <ben.kurtovic@gmail.com>
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy # Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal # of this software and associated documentation files (the "Software"), to deal
@@ -30,6 +30,8 @@ except ImportError:
from mwparserfromhell.compat import str from mwparserfromhell.compat import str
from mwparserfromhell.nodes import HTMLEntity, Template, Text from mwparserfromhell.nodes import HTMLEntity, Template, Text
from mwparserfromhell.nodes.extras import Parameter from mwparserfromhell.nodes.extras import Parameter
from mwparserfromhell import parse

from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext


pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True) pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True)
@@ -287,7 +289,7 @@ class TestTemplate(TreeEqualityTestCase):
self.assertIsInstance(node12.params[1].value.get(1), HTMLEntity) self.assertIsInstance(node12.params[1].value.get(1), HTMLEntity)
self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|\nh = i}}", node13) self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|\nh = i}}", node13)
self.assertEqual("{{a\n|b =c\n|d = e|f =g\n|h = i\n|j =k\n}}", node14) self.assertEqual("{{a\n|b =c\n|d = e|f =g\n|h = i\n|j =k\n}}", node14)
self.assertEqual("{{a|b = c\n|\nd = e|\nf =g |h =i}}", node15)
self.assertEqual("{{a|b = c\n|\nd = e|\nf =g |\nh = i}}", node15)
self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|h=i}}", node16) self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|h=i}}", node16)
self.assertEqual("{{a|b|c}}", node17) self.assertEqual("{{a|b|c}}", node17)
self.assertEqual("{{a|b|3=c}}", node18) self.assertEqual("{{a|b|3=c}}", node18)
@@ -439,5 +441,228 @@ class TestTemplate(TreeEqualityTestCase):
self.assertEqual("{{foo|a=b|c=d|e=f|a=|a=b}}", node26) self.assertEqual("{{foo|a=b|c=d|e=f|a=|a=b}}", node26)
self.assertRaises(ValueError, node27.remove, node28.get(1)) self.assertRaises(ValueError, node27.remove, node28.get(1))


def test_formatting(self):
"""test realistic param manipulation with complex whitespace formatting
(assumes that parsing works correctly)"""
tests = [
# https://en.wikipedia.org/w/index.php?title=Lamar_County,_Georgia&oldid=792356004
("""{{Infobox U.S. county
| county = Lamar County
| state = Georgia
| seal =
| founded = 1920
| seat wl = Barnesville
| largest city wl = Barnesville
| area_total_sq_mi = 186
| area_land_sq_mi = 184
| area_water_sq_mi = 2.3
| area percentage = 1.3%
| census yr = 2010
| pop = 18317
| density_sq_mi = 100
| time zone = Eastern
| footnotes =
| web = www.lamarcountyga.com
| ex image = Lamar County Georgia Courthouse.jpg
| ex image cap = Lamar County courthouse in Barnesville
| district = 3rd
| named for = [[Lucius Quintus Cincinnatus Lamar II]]
}}""", """{{Infobox U.S. county
| county = Lamar County
| state = Georgia
| seal =
| founded = 1920
| seat wl = Barnesville
| largest city wl = Barnesville
| area_total_sq_mi = 186
| area_land_sq_mi = 184
| area_water_sq_mi = 2.3
| area percentage = 1.3%
| census estimate yr = 2016
| pop = 12345<ref>example ref</ref>
| density_sq_mi = 100
| time zone = Eastern
| footnotes =
| web = www.lamarcountyga.com
| ex image = Lamar County Georgia Courthouse.jpg
| ex image cap = Lamar County courthouse in Barnesville
| district = 3rd
| named for = [[Lucius Quintus Cincinnatus Lamar II]]
}}"""),
# https://en.wikipedia.org/w/index.php?title=Rockdale_County,_Georgia&oldid=792359760
("""{{Infobox U.S. County|
county = Rockdale County |
state = Georgia |
seal = |
founded = October 18, 1870 |
seat wl = Conyers |
largest city wl = Conyers |
area_total_sq_mi = 132 |
area_land_sq_mi = 130 |
area_water_sq_mi = 2.3 |
area percentage = 1.7% |
census yr = 2010|
pop = 85215 |
density_sq_mi = 657 |
web = www.rockdalecounty.org
| ex image = Rockdale-county-courthouse.jpg
| ex image cap = Rockdale County Courthouse in Conyers
| district = 4th
| time zone= Eastern
}}""", """{{Infobox U.S. County|
county = Rockdale County |
state = Georgia |
seal = |
founded = October 18, 1870 |
seat wl = Conyers |
largest city wl = Conyers |
area_total_sq_mi = 132 |
area_land_sq_mi = 130 |
area_water_sq_mi = 2.3 |
area percentage = 1.7% |
census estimate yr = 2016 |
pop = 12345<ref>example ref</ref> |
density_sq_mi = 657 |
web = www.rockdalecounty.org
| ex image = Rockdale-county-courthouse.jpg
| ex image cap = Rockdale County Courthouse in Conyers
| district = 4th
| time zone= Eastern
}}"""),
# https://en.wikipedia.org/w/index.php?title=Spalding_County,_Georgia&oldid=792360413
("""{{Infobox U.S. County|
| county = Spalding County |
| state = Georgia |
| seal = |
| founded = 1851 |
| seat wl = Griffin |
| largest city wl = Griffin |
| area_total_sq_mi = 200 |
| area_land_sq_mi = 196 |
| area_water_sq_mi = 3.1 |
| area percentage = 1.6% |
| census yr = 2010|
| pop = 64073 |
| density_sq_mi = 326 |
| web = www.spaldingcounty.com |
| named for = [[Thomas Spalding]]
| ex image = Spalding County Courthouse (NE corner).JPG
| ex image cap = Spalding County Courthouse in Griffin
| district = 3rd
| time zone = Eastern
}}""", """{{Infobox U.S. County|
| county = Spalding County |
| state = Georgia |
| seal = |
| founded = 1851 |
| seat wl = Griffin |
| largest city wl = Griffin |
| area_total_sq_mi = 200 |
| area_land_sq_mi = 196 |
| area_water_sq_mi = 3.1 |
| area percentage = 1.6% |
|
| census estimate yr = 2016 | pop = 12345<ref>example ref</ref> |
| density_sq_mi = 326 |
| web = www.spaldingcounty.com |
| named for = [[Thomas Spalding]]
| ex image = Spalding County Courthouse (NE corner).JPG
| ex image cap = Spalding County Courthouse in Griffin
| district = 3rd
| time zone = Eastern
}}"""),
# https://en.wikipedia.org/w/index.php?title=Clinton_County,_Illinois&oldid=794694648
("""{{Infobox U.S. county
|county = Clinton County
|state = Illinois
| ex image = File:Clinton County Courthouse, Carlyle.jpg
| ex image cap = [[Clinton County Courthouse (Illinois)|Clinton County Courthouse]]
|seal =
|founded = 1824
|named for = [[DeWitt Clinton]]
|seat wl= Carlyle
| largest city wl = Breese
|time zone=Central
|area_total_sq_mi = 503
|area_land_sq_mi = 474
|area_water_sq_mi = 29
|area percentage = 5.8%
|census yr = 2010
|pop = 37762
|density_sq_mi = 80
|web = www.clintonco.illinois.gov
| district = 15th
}}""", """{{Infobox U.S. county
|county = Clinton County
|state = Illinois
| ex image = File:Clinton County Courthouse, Carlyle.jpg
| ex image cap = [[Clinton County Courthouse (Illinois)|Clinton County Courthouse]]
|seal =
|founded = 1824
|named for = [[DeWitt Clinton]]
|seat wl= Carlyle
| largest city wl = Breese
|time zone=Central
|area_total_sq_mi = 503
|area_land_sq_mi = 474
|area_water_sq_mi = 29
|area percentage = 5.8%
|census estimate yr = 2016
|pop = 12345<ref>example ref</ref>
|density_sq_mi = 80
|web = www.clintonco.illinois.gov
| district = 15th
}}"""),
# https://en.wikipedia.org/w/index.php?title=Winnebago_County,_Illinois&oldid=789193800
("""{{Infobox U.S. county |
county = Winnebago County |
state = Illinois |
seal = Winnebago County il seal.png |
named for = [[Winnebago (tribe)|Winnebago Tribe]] |
seat wl= Rockford |
largest city wl = Rockford|
area_total_sq_mi = 519 |
area_land_sq_mi = 513|
area_water_sq_mi = 5.9 |
area percentage = 1.1% |
census yr = 2010|
pop = 295266 |
density_sq_mi = 575
| web = www.wincoil.us
| founded year = 1836
| founded date = January 16
| time zone = Central
| district = 16th
| district2 = 17th
}}""", """{{Infobox U.S. county |
county = Winnebago County |
state = Illinois |
seal = Winnebago County il seal.png |
named for = [[Winnebago (tribe)|Winnebago Tribe]] |
seat wl= Rockford |
largest city wl = Rockford|
area_total_sq_mi = 519 |
area_land_sq_mi = 513|
area_water_sq_mi = 5.9 |
area percentage = 1.1% |
census estimate yr = 2016|
pop = 12345<ref>example ref</ref> |
density_sq_mi = 575
| web = www.wincoil.us
| founded year = 1836
| founded date = January 16
| time zone = Central
| district = 16th
| district2 = 17th
}}""")]

for (original, expected) in tests:
code = parse(original)
template = code.filter_templates()[0]
template.add("pop", "12345<ref>example ref</ref>")
template.add('census estimate yr', "2016", before="pop")
template.remove("census yr")
self.assertEqual(expected, str(code))

if __name__ == "__main__": if __name__ == "__main__":
unittest.main(verbosity=2) unittest.main(verbosity=2)

Loading…
Cancel
Save