From a880ff8bf2723da212dfc5b71cc3f92d498e6c06 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 21 May 2012 00:09:29 -0400 Subject: [PATCH] mwtemplateparserfromhell -> mwparserfromhell; some additions; still incomplete --- README.rst | 54 +++++++++++----------- .../__init__.py | 10 ++-- .../parameter.py | 45 +++++++++++++++++- .../parser.py | 4 +- .../template.py | 38 +++++++++++---- setup.py | 12 ++--- tests/test_parameter.py | 24 +++++++++- tests/test_parser.py | 13 ++++-- tests/test_template.py | 26 +++++++---- 9 files changed, 160 insertions(+), 66 deletions(-) rename {mwtemplateparserfromhell => mwparserfromhell}/__init__.py (81%) rename {mwtemplateparserfromhell => mwparserfromhell}/parameter.py (65%) rename {mwtemplateparserfromhell => mwparserfromhell}/parser.py (92%) rename {mwtemplateparserfromhell => mwparserfromhell}/template.py (65%) diff --git a/README.rst b/README.rst index c90c76c..c60fda4 100644 --- a/README.rst +++ b/README.rst @@ -1,22 +1,21 @@ -mwtemplateparserfromhell +mwparserfromhell ======================== -**mwtemplateparserfromhell** (the *MediaWiki Template Parser from Hell*) is a -Python package that provides an easy-to-use and outrageously powerful template -parser for MediaWiki_ wikicode. +**mwparserfromhell** (the *MediaWiki Parser from Hell*) is a Python package +that provides an easy-to-use and outrageously powerful parser for MediaWiki_ +wikicode. -Coded by Earwig_ and named by `Σ`_. +Developed by Earwig_ and named by `Σ`_. Installation ------------ The easiest way to install the parser is through the `Python Package Index`_, -so you can install the latest release with ``pip install -mwtemplateparserfromhell`` (`get pip`_). Alternatively, get the latest -development version:: +so you can install the latest release with ``pip install mwparserfromhell`` +(`get pip`_). Alternatively, get the latest development version:: - git clone git://github.com/earwig/mwtemplateparserfromhell.git mwtemplateparserfromhell - cd mwtemplateparserfromhell + git clone git://github.com/earwig/mwparserfromhell.git mwparserfromhell + cd mwparserfromhell python setup.py install You can run the comprehensive unit testing suite with ``python setup.py test``. @@ -26,25 +25,28 @@ Usage Normal usage is rather straightforward (where ``text`` is page text):: - >>> import mwtemplateparserfromhell - >>> parser = mwtemplateparserfromhell.Parser() + >>> import mwparserfromhell + >>> parser = mwparserfromhell.Parser() >>> templates = parser.parse(text) -``templates`` is a list of ``mwtemplateparserfromhell.Template`` objects, which -contain a ``name`` attribute, a ``params`` attribute, and a ``get()`` method. -For example:: +``templates`` is a list of ``mwparserfromhell.Template`` objects, which contain +a ``name`` attribute, a ``params`` attribute, and a ``render()`` method. Slices +are supported to get parameters. For example:: >>> templates = parser.parse("{{foo|bar|baz|eggs=spam}}") >>> print templates [Template(name="foo", params={"1": "bar", "2": "baz", "eggs": "spam"})] - >>> print templates[0].name + >>> template = templates[0] + >>> print template.name foo - >>> print templates[0].params + >>> print template.params ['bar', 'baz'] - >>> print templates[0].get(0) + >>> print template[0] bar - >>> print templates[0].get("eggs") + >>> print template["eggs"] spam + >>> print template.render() + {{foo|bar|baz|eggs=spam}} If ``get``\ 's argument is a number *n*, it'll return the *n*\ th parameter, otherwise it will return the parameter with the given name. Unnamed parameters @@ -66,19 +68,19 @@ By default, nested templates are supported like so:: Integration ----------- -``mwtemplateparserfromhell`` is used by and originally developed for -EarwigBot_; ``Page`` objects have a ``parse_templates`` method that essentially -calls ``Parser().parse()`` on ``page.get()``. +``mwparserfromhell`` is used by and originally developed for EarwigBot_; +``Page`` objects have a ``parse_templates`` method that essentially calls +``Parser().parse()`` on ``page.get()``. If you're using PyWikipedia_, your code might look like this:: - import mwtemplateparserfromhell + import mwparserfromhell import wikipedia as pywikibot def parse_templates(title): site = pywikibot.get_site() page = pywikibot.Page(site, title) text = page.get() - parser = mwtemplateparserfromhell.Parser() + parser = mwparserfromhell.Parser() return parser.parse(text) If you're not using a library, you can parse templates in any page using the @@ -86,13 +88,13 @@ following code (via the API_):: import json import urllib - import mwtemplateparserfromhell + import mwparserfromhell API_URL = "http://en.wikipedia.org/w/api.php" def parse_templates(title): raw = urllib.urlopen(API_URL, data).read() res = json.loads(raw) text = res["query"]["pages"].values()[0]["revisions"][0]["*"] - parser = mwtemplateparserfromhell.Parser() + parser = mwparserfromhell.Parser() return parser.parse(text) .. _MediaWiki: http://mediawiki.org diff --git a/mwtemplateparserfromhell/__init__.py b/mwparserfromhell/__init__.py similarity index 81% rename from mwtemplateparserfromhell/__init__.py rename to mwparserfromhell/__init__.py index 49370ed..8c7be05 100644 --- a/mwtemplateparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -21,11 +21,9 @@ # SOFTWARE. """ -`mwtemplateparserfromhell -`_ (the MediaWiki Template +`mwparserfromhell `_ (the MediaWiki Parser from Hell) is a Python package that provides an easy-to-use and -outrageously powerful template parser for `MediaWiki `_ -wikicode. +outrageously powerful parser for `MediaWiki `_ wikicode. """ __author__ = "Ben Kurtovic" @@ -34,5 +32,5 @@ __license__ = "MIT License" __version__ = "0.1.dev" __email__ = "ben.kurtovic@verizon.net" -from mwtemplateparserfromhell import parameter, parser, template -from mwtemplateparserfromhell.parser import Parser +from mwparserfromhell import parameter, parser, template +from mwparserfromhell.parser import Parser diff --git a/mwtemplateparserfromhell/parameter.py b/mwparserfromhell/parameter.py similarity index 65% rename from mwtemplateparserfromhell/parameter.py rename to mwparserfromhell/parameter.py index 1d62e13..e323e5f 100644 --- a/mwtemplateparserfromhell/parameter.py +++ b/mwparserfromhell/parameter.py @@ -20,6 +20,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +from mwparserfromhell.template import Template + __all__ = ["Parameter"] class Parameter(object): @@ -49,13 +51,13 @@ class Parameter(object): def __eq__(self, other): if isinstance(other, Parameter): - return (self.value == other.value and + return (self.name == other.name and self.value == other.value and self.templates == other.templates) return self.value == other def __ne__(self, other): if isinstance(other, Parameter): - return (self.value != other.value or + return (self.name != other.name or self.value != other.value or self.templates != other.templates) return self.value != other @@ -79,9 +81,48 @@ class Parameter(object): for char in self.value: yield char + def __getitem__(self, key): + return self.value[key] + def __contains__(self, item): return item in self.value or item in self.templates + def __add__(self, other): + if isinstance(other, Parameter): + return Parameter(self.name, self.value + other.value, + self.templates + other.templates) + if isinstance(other, Template): + return Parameter(self.name, self.value + other.render(), + self.templates + [other]) + return self.value + other + + def __radd__(self, other): + if isinstance(other, Template): + return Template(other.name, other.params + [self]) + return other + self.value + + def __iadd__(self, other): + if isinstance(other, Parameter): + self.value += other.value + self.templates += other.templates + elif isinstance(other, Template): + self.value += other.render() + self.templates.append(other) + else: + self.value += other + return self + + def __mul__(self, other): + return Parameter(self.name, self.value * other, self.templates * other) + + def __rmul__(self, other): + return Parameter(self.name, other * self.value, other * self.templates) + + def __imul__(self, other): + self.value *= other + self.templates *= other + return self + @property def name(self): return self._name diff --git a/mwtemplateparserfromhell/parser.py b/mwparserfromhell/parser.py similarity index 92% rename from mwtemplateparserfromhell/parser.py rename to mwparserfromhell/parser.py index 522a138..5f1622c 100644 --- a/mwtemplateparserfromhell/parser.py +++ b/mwparserfromhell/parser.py @@ -20,8 +20,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from mwtemplateparserfromhell.parameter import Parameter -from mwtemplateparserfromhell.template import Template +from mwparserfromhell.parameter import Parameter +from mwparserfromhell.template import Template __all__ = ["Parser"] diff --git a/mwtemplateparserfromhell/template.py b/mwparserfromhell/template.py similarity index 65% rename from mwtemplateparserfromhell/template.py rename to mwparserfromhell/template.py index 1f9c77c..7314ab7 100644 --- a/mwtemplateparserfromhell/template.py +++ b/mwparserfromhell/template.py @@ -41,13 +41,30 @@ class Template(object): def __eq__(self, other): if isinstance(other, Template): - return self.name == other.name and self._params == other._params - return False + return self.name == other.name and self.params == other.params + return self.render() == other def __ne__(self, other): if isinstance(other, Template): - return self.name != other.name or self._params != other._params - return True + return self.name != other.name or self.params != other.params + return self.render() != other + + def __getitem__(self, key): + try: + return self._params[key] + except KeyError: # Try lookup by order in param list + return self._params.values()[key] + + def __setitem__(self, key, value): + if isinstance(key, int): + if key > len(self._params): + raise IndexError("Index is too large") + elif key == len(self._params): # Simple addition to the end + self._params[key] = value + else: # We'll need to rebuild the OrderedDict + self._params + else: + self._params[key] = value @property def name(self): @@ -57,8 +74,11 @@ class Template(object): def params(self): return self._params.values() - def get(self, name): - try: - return self._params[name] - except KeyError: # Try lookup by order in param list - return self._params.values()[name] + def render(self): + params = "" + for param in self.params: + if param.name.isdigit() and "=" not in param.value: + params += "|" + param.value + else: + params += "|" + param.name + "=" + param.value + return "{{" + self.name + params + "}}" diff --git a/setup.py b/setup.py index 84d2dc9..bf1365b 100644 --- a/setup.py +++ b/setup.py @@ -23,23 +23,23 @@ from setuptools import setup, find_packages -from mwtemplateparserfromhell import __version__ +from mwparserfromhell import __version__ with open("README.rst") as fp: long_docs = fp.read() setup( - name = "mwtemplateparserfromhell", + name = "mwparserfromhell", packages = find_packages(exclude=("tests",)), test_suite = "tests", version = __version__, author = "Ben Kurtovic", author_email = "ben.kurtovic@verizon.net", - url = "https://github.com/earwig/mwtemplateparserfromhell", - description = "MWTemplateParserFromHell is a parser for MediaWiki templates.", + url = "https://github.com/earwig/mwparserfromhell", + description = "MWParserFromHell is a parser for MediaWiki wikicode.", long_description = long_docs, - download_url = "https://github.com/earwig/mwtemplateparserfromhell/tarball/v{0}".format(__version__), - keywords = "earwig mwtemplateparserfromhell wikipedia wiki mediawiki template parsing", + download_url = "https://github.com/earwig/mwparserfromhell/tarball/v{0}".format(__version__), + keywords = "earwig mwparserfromhell wikipedia wiki mediawiki wikicode template parsing", license = "MIT License", classifiers = [ "Development Status :: 3 - Alpha", diff --git a/tests/test_parameter.py b/tests/test_parameter.py index 80b94e1..46013ac 100644 --- a/tests/test_parameter.py +++ b/tests/test_parameter.py @@ -22,8 +22,8 @@ import unittest -from mwtemplateparserfromhell.parameter import Parameter -from mwtemplateparserfromhell.template import Template +from mwparserfromhell.parameter import Parameter +from mwparserfromhell.template import Template class TestParameter(unittest.TestCase): def setUp(self): @@ -93,7 +93,27 @@ class TestParameter(unittest.TestCase): self.assertEquals(bool(param), bool(param.value)) self.assertEquals(len(param), len(param.value)) self.assertEquals(list(param), list(param.value)) + self.assertEquals(param[2], param.value[2]) + self.assertEquals(list(reversed(param)), + list(reversed(param.value))) self.assertIs("bar" in param, "bar" in param.value) + self.assertEquals(param + "test", param.value + "test") + self.assertEquals("test" + param, "test" + param.value) + # add param + # add template left + # add template right + + self.assertEquals(param * 3, Parameter(param.name, param.value * 3, + param.templates * 3)) + self.assertEquals(3 * param, Parameter(param.name, 3 * param.value, + 3 * param.templates)) + + # add param inplace + # add template implace + # add str inplace + # multiply int inplace + self.assertIsInstance(param, Parameter) + self.assertIsInstance(param.value, str) if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/test_parser.py b/tests/test_parser.py index bbf12a5..66a9a32 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -22,9 +22,9 @@ import unittest -from mwtemplateparserfromhell.parameter import Parameter -from mwtemplateparserfromhell.parser import Parser -from mwtemplateparserfromhell.template import Template +from mwparserfromhell.parameter import Parameter +from mwparserfromhell.parser import Parser +from mwparserfromhell.template import Template TESTS = [ ("", []), @@ -37,7 +37,6 @@ TESTS = [ ("multiple {{-}} templates {{+}}!", [Template("-"), Template("+")]), ("{{{no templates here}}}", []), ("{ {{templates here}}}", [Template("templates here")]), - ("{{{{I exist}} }}", [Template("I exist")]), ("{{{{I do not exist}}}}", []), ("{{foo|bar|baz|eggs=spam}}", [Template("foo", [Parameter("1", "bar"), Parameter("2", "baz"), @@ -46,6 +45,12 @@ TESTS = [ [Template("abc def", [Parameter("1", "ghi"), Parameter("jk", "lmno"), Parameter("2", "pqr"), Parameter("st", "uv"), Parameter("3", "wx"), Parameter("4", "yz")])]), + ("{{this has a|{{template}}|inside of it}}", + [Template("this has a", [Parameter("1", "{{template}}", + [Template("template")]), + Parameter("2", "inside of it")])]), + ("{{{{I exist}} }}", [Template("I exist", [] )]), + ("{{}}") ] class TestParser(unittest.TestCase): diff --git a/tests/test_template.py b/tests/test_template.py index 0938e4a..ba15e6e 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -23,8 +23,8 @@ from itertools import permutations import unittest -from mwtemplateparserfromhell.parameter import Parameter -from mwtemplateparserfromhell.template import Template +from mwparserfromhell.parameter import Parameter +from mwparserfromhell.template import Template class TestTemplate(unittest.TestCase): def setUp(self): @@ -57,14 +57,22 @@ class TestTemplate(unittest.TestCase): Template(name=self.name, params=self.params)): self.assertEqual(template.params, self.params) - def test_get(self): + def test_getitem(self): template = Template(name=self.name, params=self.params) - self.assertIs(template.get(0), self.bar) - self.assertIs(template.get(1), self.baz) - self.assertIs(template.get(2), self.eggs) - self.assertIs(template.get("1"), self.bar) - self.assertIs(template.get("2"), self.baz) - self.assertIs(template.get("eggs"), self.eggs) + self.assertIs(template[0], self.bar) + self.assertIs(template[1], self.baz) + self.assertIs(template[2], self.eggs) + self.assertIs(template["1"], self.bar) + self.assertIs(template["2"], self.baz) + self.assertIs(template["eggs"], self.eggs) + + def test_render(self): + tests = [ + (Template(self.name), "{{foo}}"), + (Template(self.name, self.params), "{{foo|bar|baz|eggs=spam}}") + ] + for template, rendered in tests: + self.assertEqual(template.render(), rendered) def test_repr(self): correct1= 'Template(name=foo, params={})'