@@ -1,22 +1,21 @@ | |||
mwtemplateparserfromhell | |||
mwparserfromhell | |||
======================== | |||
**mwtemplateparserfromhell** (the *MediaWiki Template Parser from Hell*) is a | |||
Python package that provides an easy-to-use and outrageously powerful template | |||
parser for MediaWiki_ wikicode. | |||
**mwparserfromhell** (the *MediaWiki Parser from Hell*) is a Python package | |||
that provides an easy-to-use and outrageously powerful parser for MediaWiki_ | |||
wikicode. | |||
Coded by Earwig_ and named by `Σ`_. | |||
Developed by Earwig_ and named by `Σ`_. | |||
Installation | |||
------------ | |||
The easiest way to install the parser is through the `Python Package Index`_, | |||
so you can install the latest release with ``pip install | |||
mwtemplateparserfromhell`` (`get pip`_). Alternatively, get the latest | |||
development version:: | |||
so you can install the latest release with ``pip install mwparserfromhell`` | |||
(`get pip`_). Alternatively, get the latest development version:: | |||
git clone git://github.com/earwig/mwtemplateparserfromhell.git mwtemplateparserfromhell | |||
cd mwtemplateparserfromhell | |||
git clone git://github.com/earwig/mwparserfromhell.git mwparserfromhell | |||
cd mwparserfromhell | |||
python setup.py install | |||
You can run the comprehensive unit testing suite with ``python setup.py test``. | |||
@@ -26,25 +25,28 @@ Usage | |||
Normal usage is rather straightforward (where ``text`` is page text):: | |||
>>> import mwtemplateparserfromhell | |||
>>> parser = mwtemplateparserfromhell.Parser() | |||
>>> import mwparserfromhell | |||
>>> parser = mwparserfromhell.Parser() | |||
>>> templates = parser.parse(text) | |||
``templates`` is a list of ``mwtemplateparserfromhell.Template`` objects, which | |||
contain a ``name`` attribute, a ``params`` attribute, and a ``get()`` method. | |||
For example:: | |||
``templates`` is a list of ``mwparserfromhell.Template`` objects, which contain | |||
a ``name`` attribute, a ``params`` attribute, and a ``render()`` method. Slices | |||
are supported to get parameters. For example:: | |||
>>> templates = parser.parse("{{foo|bar|baz|eggs=spam}}") | |||
>>> print templates | |||
[Template(name="foo", params={"1": "bar", "2": "baz", "eggs": "spam"})] | |||
>>> print templates[0].name | |||
>>> template = templates[0] | |||
>>> print template.name | |||
foo | |||
>>> print templates[0].params | |||
>>> print template.params | |||
['bar', 'baz'] | |||
>>> print templates[0].get(0) | |||
>>> print template[0] | |||
bar | |||
>>> print templates[0].get("eggs") | |||
>>> print template["eggs"] | |||
spam | |||
>>> print template.render() | |||
{{foo|bar|baz|eggs=spam}} | |||
If ``get``\ 's argument is a number *n*, it'll return the *n*\ th parameter, | |||
otherwise it will return the parameter with the given name. Unnamed parameters | |||
@@ -66,19 +68,19 @@ By default, nested templates are supported like so:: | |||
Integration | |||
----------- | |||
``mwtemplateparserfromhell`` is used by and originally developed for | |||
EarwigBot_; ``Page`` objects have a ``parse_templates`` method that essentially | |||
calls ``Parser().parse()`` on ``page.get()``. | |||
``mwparserfromhell`` is used by and originally developed for EarwigBot_; | |||
``Page`` objects have a ``parse_templates`` method that essentially calls | |||
``Parser().parse()`` on ``page.get()``. | |||
If you're using PyWikipedia_, your code might look like this:: | |||
import mwtemplateparserfromhell | |||
import mwparserfromhell | |||
import wikipedia as pywikibot | |||
def parse_templates(title): | |||
site = pywikibot.get_site() | |||
page = pywikibot.Page(site, title) | |||
text = page.get() | |||
parser = mwtemplateparserfromhell.Parser() | |||
parser = mwparserfromhell.Parser() | |||
return parser.parse(text) | |||
If you're not using a library, you can parse templates in any page using the | |||
@@ -86,13 +88,13 @@ following code (via the API_):: | |||
import json | |||
import urllib | |||
import mwtemplateparserfromhell | |||
import mwparserfromhell | |||
API_URL = "http://en.wikipedia.org/w/api.php" | |||
def parse_templates(title): | |||
raw = urllib.urlopen(API_URL, data).read() | |||
res = json.loads(raw) | |||
text = res["query"]["pages"].values()[0]["revisions"][0]["*"] | |||
parser = mwtemplateparserfromhell.Parser() | |||
parser = mwparserfromhell.Parser() | |||
return parser.parse(text) | |||
.. _MediaWiki: http://mediawiki.org | |||
@@ -21,11 +21,9 @@ | |||
# SOFTWARE. | |||
""" | |||
`mwtemplateparserfromhell | |||
<https://github.com/earwig/mwtemplateparserfromhell>`_ (the MediaWiki Template | |||
`mwparserfromhell <https://github.com/earwig/mwparserfromhell>`_ (the MediaWiki | |||
Parser from Hell) is a Python package that provides an easy-to-use and | |||
outrageously powerful template parser for `MediaWiki <http://mediawiki.org>`_ | |||
wikicode. | |||
outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode. | |||
""" | |||
__author__ = "Ben Kurtovic" | |||
@@ -34,5 +32,5 @@ __license__ = "MIT License" | |||
__version__ = "0.1.dev" | |||
__email__ = "ben.kurtovic@verizon.net" | |||
from mwtemplateparserfromhell import parameter, parser, template | |||
from mwtemplateparserfromhell.parser import Parser | |||
from mwparserfromhell import parameter, parser, template | |||
from mwparserfromhell.parser import Parser |
@@ -20,6 +20,8 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from mwparserfromhell.template import Template | |||
__all__ = ["Parameter"] | |||
class Parameter(object): | |||
@@ -49,13 +51,13 @@ class Parameter(object): | |||
def __eq__(self, other): | |||
if isinstance(other, Parameter): | |||
return (self.value == other.value and | |||
return (self.name == other.name and self.value == other.value and | |||
self.templates == other.templates) | |||
return self.value == other | |||
def __ne__(self, other): | |||
if isinstance(other, Parameter): | |||
return (self.value != other.value or | |||
return (self.name != other.name or self.value != other.value or | |||
self.templates != other.templates) | |||
return self.value != other | |||
@@ -79,9 +81,48 @@ class Parameter(object): | |||
for char in self.value: | |||
yield char | |||
def __getitem__(self, key): | |||
return self.value[key] | |||
def __contains__(self, item): | |||
return item in self.value or item in self.templates | |||
def __add__(self, other): | |||
if isinstance(other, Parameter): | |||
return Parameter(self.name, self.value + other.value, | |||
self.templates + other.templates) | |||
if isinstance(other, Template): | |||
return Parameter(self.name, self.value + other.render(), | |||
self.templates + [other]) | |||
return self.value + other | |||
def __radd__(self, other): | |||
if isinstance(other, Template): | |||
return Template(other.name, other.params + [self]) | |||
return other + self.value | |||
def __iadd__(self, other): | |||
if isinstance(other, Parameter): | |||
self.value += other.value | |||
self.templates += other.templates | |||
elif isinstance(other, Template): | |||
self.value += other.render() | |||
self.templates.append(other) | |||
else: | |||
self.value += other | |||
return self | |||
def __mul__(self, other): | |||
return Parameter(self.name, self.value * other, self.templates * other) | |||
def __rmul__(self, other): | |||
return Parameter(self.name, other * self.value, other * self.templates) | |||
def __imul__(self, other): | |||
self.value *= other | |||
self.templates *= other | |||
return self | |||
@property | |||
def name(self): | |||
return self._name |
@@ -20,8 +20,8 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from mwtemplateparserfromhell.parameter import Parameter | |||
from mwtemplateparserfromhell.template import Template | |||
from mwparserfromhell.parameter import Parameter | |||
from mwparserfromhell.template import Template | |||
__all__ = ["Parser"] | |||
@@ -41,13 +41,30 @@ class Template(object): | |||
def __eq__(self, other): | |||
if isinstance(other, Template): | |||
return self.name == other.name and self._params == other._params | |||
return False | |||
return self.name == other.name and self.params == other.params | |||
return self.render() == other | |||
def __ne__(self, other): | |||
if isinstance(other, Template): | |||
return self.name != other.name or self._params != other._params | |||
return True | |||
return self.name != other.name or self.params != other.params | |||
return self.render() != other | |||
def __getitem__(self, key): | |||
try: | |||
return self._params[key] | |||
except KeyError: # Try lookup by order in param list | |||
return self._params.values()[key] | |||
def __setitem__(self, key, value): | |||
if isinstance(key, int): | |||
if key > len(self._params): | |||
raise IndexError("Index is too large") | |||
elif key == len(self._params): # Simple addition to the end | |||
self._params[key] = value | |||
else: # We'll need to rebuild the OrderedDict | |||
self._params | |||
else: | |||
self._params[key] = value | |||
@property | |||
def name(self): | |||
@@ -57,8 +74,11 @@ class Template(object): | |||
def params(self): | |||
return self._params.values() | |||
def get(self, name): | |||
try: | |||
return self._params[name] | |||
except KeyError: # Try lookup by order in param list | |||
return self._params.values()[name] | |||
def render(self): | |||
params = "" | |||
for param in self.params: | |||
if param.name.isdigit() and "=" not in param.value: | |||
params += "|" + param.value | |||
else: | |||
params += "|" + param.name + "=" + param.value | |||
return "{{" + self.name + params + "}}" |
@@ -23,23 +23,23 @@ | |||
from setuptools import setup, find_packages | |||
from mwtemplateparserfromhell import __version__ | |||
from mwparserfromhell import __version__ | |||
with open("README.rst") as fp: | |||
long_docs = fp.read() | |||
setup( | |||
name = "mwtemplateparserfromhell", | |||
name = "mwparserfromhell", | |||
packages = find_packages(exclude=("tests",)), | |||
test_suite = "tests", | |||
version = __version__, | |||
author = "Ben Kurtovic", | |||
author_email = "ben.kurtovic@verizon.net", | |||
url = "https://github.com/earwig/mwtemplateparserfromhell", | |||
description = "MWTemplateParserFromHell is a parser for MediaWiki templates.", | |||
url = "https://github.com/earwig/mwparserfromhell", | |||
description = "MWParserFromHell is a parser for MediaWiki wikicode.", | |||
long_description = long_docs, | |||
download_url = "https://github.com/earwig/mwtemplateparserfromhell/tarball/v{0}".format(__version__), | |||
keywords = "earwig mwtemplateparserfromhell wikipedia wiki mediawiki template parsing", | |||
download_url = "https://github.com/earwig/mwparserfromhell/tarball/v{0}".format(__version__), | |||
keywords = "earwig mwparserfromhell wikipedia wiki mediawiki wikicode template parsing", | |||
license = "MIT License", | |||
classifiers = [ | |||
"Development Status :: 3 - Alpha", | |||
@@ -22,8 +22,8 @@ | |||
import unittest | |||
from mwtemplateparserfromhell.parameter import Parameter | |||
from mwtemplateparserfromhell.template import Template | |||
from mwparserfromhell.parameter import Parameter | |||
from mwparserfromhell.template import Template | |||
class TestParameter(unittest.TestCase): | |||
def setUp(self): | |||
@@ -93,7 +93,27 @@ class TestParameter(unittest.TestCase): | |||
self.assertEquals(bool(param), bool(param.value)) | |||
self.assertEquals(len(param), len(param.value)) | |||
self.assertEquals(list(param), list(param.value)) | |||
self.assertEquals(param[2], param.value[2]) | |||
self.assertEquals(list(reversed(param)), | |||
list(reversed(param.value))) | |||
self.assertIs("bar" in param, "bar" in param.value) | |||
self.assertEquals(param + "test", param.value + "test") | |||
self.assertEquals("test" + param, "test" + param.value) | |||
# add param | |||
# add template left | |||
# add template right | |||
self.assertEquals(param * 3, Parameter(param.name, param.value * 3, | |||
param.templates * 3)) | |||
self.assertEquals(3 * param, Parameter(param.name, 3 * param.value, | |||
3 * param.templates)) | |||
# add param inplace | |||
# add template implace | |||
# add str inplace | |||
# multiply int inplace | |||
self.assertIsInstance(param, Parameter) | |||
self.assertIsInstance(param.value, str) | |||
if __name__ == "__main__": | |||
unittest.main(verbosity=2) |
@@ -22,9 +22,9 @@ | |||
import unittest | |||
from mwtemplateparserfromhell.parameter import Parameter | |||
from mwtemplateparserfromhell.parser import Parser | |||
from mwtemplateparserfromhell.template import Template | |||
from mwparserfromhell.parameter import Parameter | |||
from mwparserfromhell.parser import Parser | |||
from mwparserfromhell.template import Template | |||
TESTS = [ | |||
("", []), | |||
@@ -37,7 +37,6 @@ TESTS = [ | |||
("multiple {{-}} templates {{+}}!", [Template("-"), Template("+")]), | |||
("{{{no templates here}}}", []), | |||
("{ {{templates here}}}", [Template("templates here")]), | |||
("{{{{I exist}} }}", [Template("I exist")]), | |||
("{{{{I do not exist}}}}", []), | |||
("{{foo|bar|baz|eggs=spam}}", | |||
[Template("foo", [Parameter("1", "bar"), Parameter("2", "baz"), | |||
@@ -46,6 +45,12 @@ TESTS = [ | |||
[Template("abc def", [Parameter("1", "ghi"), Parameter("jk", "lmno"), | |||
Parameter("2", "pqr"), Parameter("st", "uv"), | |||
Parameter("3", "wx"), Parameter("4", "yz")])]), | |||
("{{this has a|{{template}}|inside of it}}", | |||
[Template("this has a", [Parameter("1", "{{template}}", | |||
[Template("template")]), | |||
Parameter("2", "inside of it")])]), | |||
("{{{{I exist}} }}", [Template("I exist", [] )]), | |||
("{{}}") | |||
] | |||
class TestParser(unittest.TestCase): | |||
@@ -23,8 +23,8 @@ | |||
from itertools import permutations | |||
import unittest | |||
from mwtemplateparserfromhell.parameter import Parameter | |||
from mwtemplateparserfromhell.template import Template | |||
from mwparserfromhell.parameter import Parameter | |||
from mwparserfromhell.template import Template | |||
class TestTemplate(unittest.TestCase): | |||
def setUp(self): | |||
@@ -57,14 +57,22 @@ class TestTemplate(unittest.TestCase): | |||
Template(name=self.name, params=self.params)): | |||
self.assertEqual(template.params, self.params) | |||
def test_get(self): | |||
def test_getitem(self): | |||
template = Template(name=self.name, params=self.params) | |||
self.assertIs(template.get(0), self.bar) | |||
self.assertIs(template.get(1), self.baz) | |||
self.assertIs(template.get(2), self.eggs) | |||
self.assertIs(template.get("1"), self.bar) | |||
self.assertIs(template.get("2"), self.baz) | |||
self.assertIs(template.get("eggs"), self.eggs) | |||
self.assertIs(template[0], self.bar) | |||
self.assertIs(template[1], self.baz) | |||
self.assertIs(template[2], self.eggs) | |||
self.assertIs(template["1"], self.bar) | |||
self.assertIs(template["2"], self.baz) | |||
self.assertIs(template["eggs"], self.eggs) | |||
def test_render(self): | |||
tests = [ | |||
(Template(self.name), "{{foo}}"), | |||
(Template(self.name, self.params), "{{foo|bar|baz|eggs=spam}}") | |||
] | |||
for template, rendered in tests: | |||
self.assertEqual(template.render(), rendered) | |||
def test_repr(self): | |||
correct1= 'Template(name=foo, params={})' | |||