@@ -1,22 +1,21 @@ | |||||
mwtemplateparserfromhell | |||||
mwparserfromhell | |||||
======================== | ======================== | ||||
**mwtemplateparserfromhell** (the *MediaWiki Template Parser from Hell*) is a | |||||
Python package that provides an easy-to-use and outrageously powerful template | |||||
parser for MediaWiki_ wikicode. | |||||
**mwparserfromhell** (the *MediaWiki Parser from Hell*) is a Python package | |||||
that provides an easy-to-use and outrageously powerful parser for MediaWiki_ | |||||
wikicode. | |||||
Coded by Earwig_ and named by `Σ`_. | |||||
Developed by Earwig_ and named by `Σ`_. | |||||
Installation | Installation | ||||
------------ | ------------ | ||||
The easiest way to install the parser is through the `Python Package Index`_, | The easiest way to install the parser is through the `Python Package Index`_, | ||||
so you can install the latest release with ``pip install | |||||
mwtemplateparserfromhell`` (`get pip`_). Alternatively, get the latest | |||||
development version:: | |||||
so you can install the latest release with ``pip install mwparserfromhell`` | |||||
(`get pip`_). Alternatively, get the latest development version:: | |||||
git clone git://github.com/earwig/mwtemplateparserfromhell.git mwtemplateparserfromhell | |||||
cd mwtemplateparserfromhell | |||||
git clone git://github.com/earwig/mwparserfromhell.git mwparserfromhell | |||||
cd mwparserfromhell | |||||
python setup.py install | python setup.py install | ||||
You can run the comprehensive unit testing suite with ``python setup.py test``. | You can run the comprehensive unit testing suite with ``python setup.py test``. | ||||
@@ -26,25 +25,28 @@ Usage | |||||
Normal usage is rather straightforward (where ``text`` is page text):: | Normal usage is rather straightforward (where ``text`` is page text):: | ||||
>>> import mwtemplateparserfromhell | |||||
>>> parser = mwtemplateparserfromhell.Parser() | |||||
>>> import mwparserfromhell | |||||
>>> parser = mwparserfromhell.Parser() | |||||
>>> templates = parser.parse(text) | >>> templates = parser.parse(text) | ||||
``templates`` is a list of ``mwtemplateparserfromhell.Template`` objects, which | |||||
contain a ``name`` attribute, a ``params`` attribute, and a ``get()`` method. | |||||
For example:: | |||||
``templates`` is a list of ``mwparserfromhell.Template`` objects, which contain | |||||
a ``name`` attribute, a ``params`` attribute, and a ``render()`` method. Slices | |||||
are supported to get parameters. For example:: | |||||
>>> templates = parser.parse("{{foo|bar|baz|eggs=spam}}") | >>> templates = parser.parse("{{foo|bar|baz|eggs=spam}}") | ||||
>>> print templates | >>> print templates | ||||
[Template(name="foo", params={"1": "bar", "2": "baz", "eggs": "spam"})] | [Template(name="foo", params={"1": "bar", "2": "baz", "eggs": "spam"})] | ||||
>>> print templates[0].name | |||||
>>> template = templates[0] | |||||
>>> print template.name | |||||
foo | foo | ||||
>>> print templates[0].params | |||||
>>> print template.params | |||||
['bar', 'baz'] | ['bar', 'baz'] | ||||
>>> print templates[0].get(0) | |||||
>>> print template[0] | |||||
bar | bar | ||||
>>> print templates[0].get("eggs") | |||||
>>> print template["eggs"] | |||||
spam | spam | ||||
>>> print template.render() | |||||
{{foo|bar|baz|eggs=spam}} | |||||
If ``get``\ 's argument is a number *n*, it'll return the *n*\ th parameter, | If ``get``\ 's argument is a number *n*, it'll return the *n*\ th parameter, | ||||
otherwise it will return the parameter with the given name. Unnamed parameters | otherwise it will return the parameter with the given name. Unnamed parameters | ||||
@@ -66,19 +68,19 @@ By default, nested templates are supported like so:: | |||||
Integration | Integration | ||||
----------- | ----------- | ||||
``mwtemplateparserfromhell`` is used by and originally developed for | |||||
EarwigBot_; ``Page`` objects have a ``parse_templates`` method that essentially | |||||
calls ``Parser().parse()`` on ``page.get()``. | |||||
``mwparserfromhell`` is used by and originally developed for EarwigBot_; | |||||
``Page`` objects have a ``parse_templates`` method that essentially calls | |||||
``Parser().parse()`` on ``page.get()``. | |||||
If you're using PyWikipedia_, your code might look like this:: | If you're using PyWikipedia_, your code might look like this:: | ||||
import mwtemplateparserfromhell | |||||
import mwparserfromhell | |||||
import wikipedia as pywikibot | import wikipedia as pywikibot | ||||
def parse_templates(title): | def parse_templates(title): | ||||
site = pywikibot.get_site() | site = pywikibot.get_site() | ||||
page = pywikibot.Page(site, title) | page = pywikibot.Page(site, title) | ||||
text = page.get() | text = page.get() | ||||
parser = mwtemplateparserfromhell.Parser() | |||||
parser = mwparserfromhell.Parser() | |||||
return parser.parse(text) | return parser.parse(text) | ||||
If you're not using a library, you can parse templates in any page using the | If you're not using a library, you can parse templates in any page using the | ||||
@@ -86,13 +88,13 @@ following code (via the API_):: | |||||
import json | import json | ||||
import urllib | import urllib | ||||
import mwtemplateparserfromhell | |||||
import mwparserfromhell | |||||
API_URL = "http://en.wikipedia.org/w/api.php" | API_URL = "http://en.wikipedia.org/w/api.php" | ||||
def parse_templates(title): | def parse_templates(title): | ||||
raw = urllib.urlopen(API_URL, data).read() | raw = urllib.urlopen(API_URL, data).read() | ||||
res = json.loads(raw) | res = json.loads(raw) | ||||
text = res["query"]["pages"].values()[0]["revisions"][0]["*"] | text = res["query"]["pages"].values()[0]["revisions"][0]["*"] | ||||
parser = mwtemplateparserfromhell.Parser() | |||||
parser = mwparserfromhell.Parser() | |||||
return parser.parse(text) | return parser.parse(text) | ||||
.. _MediaWiki: http://mediawiki.org | .. _MediaWiki: http://mediawiki.org | ||||
@@ -21,11 +21,9 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
""" | """ | ||||
`mwtemplateparserfromhell | |||||
<https://github.com/earwig/mwtemplateparserfromhell>`_ (the MediaWiki Template | |||||
`mwparserfromhell <https://github.com/earwig/mwparserfromhell>`_ (the MediaWiki | |||||
Parser from Hell) is a Python package that provides an easy-to-use and | Parser from Hell) is a Python package that provides an easy-to-use and | ||||
outrageously powerful template parser for `MediaWiki <http://mediawiki.org>`_ | |||||
wikicode. | |||||
outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode. | |||||
""" | """ | ||||
__author__ = "Ben Kurtovic" | __author__ = "Ben Kurtovic" | ||||
@@ -34,5 +32,5 @@ __license__ = "MIT License" | |||||
__version__ = "0.1.dev" | __version__ = "0.1.dev" | ||||
__email__ = "ben.kurtovic@verizon.net" | __email__ = "ben.kurtovic@verizon.net" | ||||
from mwtemplateparserfromhell import parameter, parser, template | |||||
from mwtemplateparserfromhell.parser import Parser | |||||
from mwparserfromhell import parameter, parser, template | |||||
from mwparserfromhell.parser import Parser |
@@ -20,6 +20,8 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
from mwparserfromhell.template import Template | |||||
__all__ = ["Parameter"] | __all__ = ["Parameter"] | ||||
class Parameter(object): | class Parameter(object): | ||||
@@ -49,13 +51,13 @@ class Parameter(object): | |||||
def __eq__(self, other): | def __eq__(self, other): | ||||
if isinstance(other, Parameter): | if isinstance(other, Parameter): | ||||
return (self.value == other.value and | |||||
return (self.name == other.name and self.value == other.value and | |||||
self.templates == other.templates) | self.templates == other.templates) | ||||
return self.value == other | return self.value == other | ||||
def __ne__(self, other): | def __ne__(self, other): | ||||
if isinstance(other, Parameter): | if isinstance(other, Parameter): | ||||
return (self.value != other.value or | |||||
return (self.name != other.name or self.value != other.value or | |||||
self.templates != other.templates) | self.templates != other.templates) | ||||
return self.value != other | return self.value != other | ||||
@@ -79,9 +81,48 @@ class Parameter(object): | |||||
for char in self.value: | for char in self.value: | ||||
yield char | yield char | ||||
def __getitem__(self, key): | |||||
return self.value[key] | |||||
def __contains__(self, item): | def __contains__(self, item): | ||||
return item in self.value or item in self.templates | return item in self.value or item in self.templates | ||||
def __add__(self, other): | |||||
if isinstance(other, Parameter): | |||||
return Parameter(self.name, self.value + other.value, | |||||
self.templates + other.templates) | |||||
if isinstance(other, Template): | |||||
return Parameter(self.name, self.value + other.render(), | |||||
self.templates + [other]) | |||||
return self.value + other | |||||
def __radd__(self, other): | |||||
if isinstance(other, Template): | |||||
return Template(other.name, other.params + [self]) | |||||
return other + self.value | |||||
def __iadd__(self, other): | |||||
if isinstance(other, Parameter): | |||||
self.value += other.value | |||||
self.templates += other.templates | |||||
elif isinstance(other, Template): | |||||
self.value += other.render() | |||||
self.templates.append(other) | |||||
else: | |||||
self.value += other | |||||
return self | |||||
def __mul__(self, other): | |||||
return Parameter(self.name, self.value * other, self.templates * other) | |||||
def __rmul__(self, other): | |||||
return Parameter(self.name, other * self.value, other * self.templates) | |||||
def __imul__(self, other): | |||||
self.value *= other | |||||
self.templates *= other | |||||
return self | |||||
@property | @property | ||||
def name(self): | def name(self): | ||||
return self._name | return self._name |
@@ -20,8 +20,8 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
from mwtemplateparserfromhell.parameter import Parameter | |||||
from mwtemplateparserfromhell.template import Template | |||||
from mwparserfromhell.parameter import Parameter | |||||
from mwparserfromhell.template import Template | |||||
__all__ = ["Parser"] | __all__ = ["Parser"] | ||||
@@ -41,13 +41,30 @@ class Template(object): | |||||
def __eq__(self, other): | def __eq__(self, other): | ||||
if isinstance(other, Template): | if isinstance(other, Template): | ||||
return self.name == other.name and self._params == other._params | |||||
return False | |||||
return self.name == other.name and self.params == other.params | |||||
return self.render() == other | |||||
def __ne__(self, other): | def __ne__(self, other): | ||||
if isinstance(other, Template): | if isinstance(other, Template): | ||||
return self.name != other.name or self._params != other._params | |||||
return True | |||||
return self.name != other.name or self.params != other.params | |||||
return self.render() != other | |||||
def __getitem__(self, key): | |||||
try: | |||||
return self._params[key] | |||||
except KeyError: # Try lookup by order in param list | |||||
return self._params.values()[key] | |||||
def __setitem__(self, key, value): | |||||
if isinstance(key, int): | |||||
if key > len(self._params): | |||||
raise IndexError("Index is too large") | |||||
elif key == len(self._params): # Simple addition to the end | |||||
self._params[key] = value | |||||
else: # We'll need to rebuild the OrderedDict | |||||
self._params | |||||
else: | |||||
self._params[key] = value | |||||
@property | @property | ||||
def name(self): | def name(self): | ||||
@@ -57,8 +74,11 @@ class Template(object): | |||||
def params(self): | def params(self): | ||||
return self._params.values() | return self._params.values() | ||||
def get(self, name): | |||||
try: | |||||
return self._params[name] | |||||
except KeyError: # Try lookup by order in param list | |||||
return self._params.values()[name] | |||||
def render(self): | |||||
params = "" | |||||
for param in self.params: | |||||
if param.name.isdigit() and "=" not in param.value: | |||||
params += "|" + param.value | |||||
else: | |||||
params += "|" + param.name + "=" + param.value | |||||
return "{{" + self.name + params + "}}" |
@@ -23,23 +23,23 @@ | |||||
from setuptools import setup, find_packages | from setuptools import setup, find_packages | ||||
from mwtemplateparserfromhell import __version__ | |||||
from mwparserfromhell import __version__ | |||||
with open("README.rst") as fp: | with open("README.rst") as fp: | ||||
long_docs = fp.read() | long_docs = fp.read() | ||||
setup( | setup( | ||||
name = "mwtemplateparserfromhell", | |||||
name = "mwparserfromhell", | |||||
packages = find_packages(exclude=("tests",)), | packages = find_packages(exclude=("tests",)), | ||||
test_suite = "tests", | test_suite = "tests", | ||||
version = __version__, | version = __version__, | ||||
author = "Ben Kurtovic", | author = "Ben Kurtovic", | ||||
author_email = "ben.kurtovic@verizon.net", | author_email = "ben.kurtovic@verizon.net", | ||||
url = "https://github.com/earwig/mwtemplateparserfromhell", | |||||
description = "MWTemplateParserFromHell is a parser for MediaWiki templates.", | |||||
url = "https://github.com/earwig/mwparserfromhell", | |||||
description = "MWParserFromHell is a parser for MediaWiki wikicode.", | |||||
long_description = long_docs, | long_description = long_docs, | ||||
download_url = "https://github.com/earwig/mwtemplateparserfromhell/tarball/v{0}".format(__version__), | |||||
keywords = "earwig mwtemplateparserfromhell wikipedia wiki mediawiki template parsing", | |||||
download_url = "https://github.com/earwig/mwparserfromhell/tarball/v{0}".format(__version__), | |||||
keywords = "earwig mwparserfromhell wikipedia wiki mediawiki wikicode template parsing", | |||||
license = "MIT License", | license = "MIT License", | ||||
classifiers = [ | classifiers = [ | ||||
"Development Status :: 3 - Alpha", | "Development Status :: 3 - Alpha", | ||||
@@ -22,8 +22,8 @@ | |||||
import unittest | import unittest | ||||
from mwtemplateparserfromhell.parameter import Parameter | |||||
from mwtemplateparserfromhell.template import Template | |||||
from mwparserfromhell.parameter import Parameter | |||||
from mwparserfromhell.template import Template | |||||
class TestParameter(unittest.TestCase): | class TestParameter(unittest.TestCase): | ||||
def setUp(self): | def setUp(self): | ||||
@@ -93,7 +93,27 @@ class TestParameter(unittest.TestCase): | |||||
self.assertEquals(bool(param), bool(param.value)) | self.assertEquals(bool(param), bool(param.value)) | ||||
self.assertEquals(len(param), len(param.value)) | self.assertEquals(len(param), len(param.value)) | ||||
self.assertEquals(list(param), list(param.value)) | self.assertEquals(list(param), list(param.value)) | ||||
self.assertEquals(param[2], param.value[2]) | |||||
self.assertEquals(list(reversed(param)), | |||||
list(reversed(param.value))) | |||||
self.assertIs("bar" in param, "bar" in param.value) | self.assertIs("bar" in param, "bar" in param.value) | ||||
self.assertEquals(param + "test", param.value + "test") | |||||
self.assertEquals("test" + param, "test" + param.value) | |||||
# add param | |||||
# add template left | |||||
# add template right | |||||
self.assertEquals(param * 3, Parameter(param.name, param.value * 3, | |||||
param.templates * 3)) | |||||
self.assertEquals(3 * param, Parameter(param.name, 3 * param.value, | |||||
3 * param.templates)) | |||||
# add param inplace | |||||
# add template implace | |||||
# add str inplace | |||||
# multiply int inplace | |||||
self.assertIsInstance(param, Parameter) | |||||
self.assertIsInstance(param.value, str) | |||||
if __name__ == "__main__": | if __name__ == "__main__": | ||||
unittest.main(verbosity=2) | unittest.main(verbosity=2) |
@@ -22,9 +22,9 @@ | |||||
import unittest | import unittest | ||||
from mwtemplateparserfromhell.parameter import Parameter | |||||
from mwtemplateparserfromhell.parser import Parser | |||||
from mwtemplateparserfromhell.template import Template | |||||
from mwparserfromhell.parameter import Parameter | |||||
from mwparserfromhell.parser import Parser | |||||
from mwparserfromhell.template import Template | |||||
TESTS = [ | TESTS = [ | ||||
("", []), | ("", []), | ||||
@@ -37,7 +37,6 @@ TESTS = [ | |||||
("multiple {{-}} templates {{+}}!", [Template("-"), Template("+")]), | ("multiple {{-}} templates {{+}}!", [Template("-"), Template("+")]), | ||||
("{{{no templates here}}}", []), | ("{{{no templates here}}}", []), | ||||
("{ {{templates here}}}", [Template("templates here")]), | ("{ {{templates here}}}", [Template("templates here")]), | ||||
("{{{{I exist}} }}", [Template("I exist")]), | |||||
("{{{{I do not exist}}}}", []), | ("{{{{I do not exist}}}}", []), | ||||
("{{foo|bar|baz|eggs=spam}}", | ("{{foo|bar|baz|eggs=spam}}", | ||||
[Template("foo", [Parameter("1", "bar"), Parameter("2", "baz"), | [Template("foo", [Parameter("1", "bar"), Parameter("2", "baz"), | ||||
@@ -46,6 +45,12 @@ TESTS = [ | |||||
[Template("abc def", [Parameter("1", "ghi"), Parameter("jk", "lmno"), | [Template("abc def", [Parameter("1", "ghi"), Parameter("jk", "lmno"), | ||||
Parameter("2", "pqr"), Parameter("st", "uv"), | Parameter("2", "pqr"), Parameter("st", "uv"), | ||||
Parameter("3", "wx"), Parameter("4", "yz")])]), | Parameter("3", "wx"), Parameter("4", "yz")])]), | ||||
("{{this has a|{{template}}|inside of it}}", | |||||
[Template("this has a", [Parameter("1", "{{template}}", | |||||
[Template("template")]), | |||||
Parameter("2", "inside of it")])]), | |||||
("{{{{I exist}} }}", [Template("I exist", [] )]), | |||||
("{{}}") | |||||
] | ] | ||||
class TestParser(unittest.TestCase): | class TestParser(unittest.TestCase): | ||||
@@ -23,8 +23,8 @@ | |||||
from itertools import permutations | from itertools import permutations | ||||
import unittest | import unittest | ||||
from mwtemplateparserfromhell.parameter import Parameter | |||||
from mwtemplateparserfromhell.template import Template | |||||
from mwparserfromhell.parameter import Parameter | |||||
from mwparserfromhell.template import Template | |||||
class TestTemplate(unittest.TestCase): | class TestTemplate(unittest.TestCase): | ||||
def setUp(self): | def setUp(self): | ||||
@@ -57,14 +57,22 @@ class TestTemplate(unittest.TestCase): | |||||
Template(name=self.name, params=self.params)): | Template(name=self.name, params=self.params)): | ||||
self.assertEqual(template.params, self.params) | self.assertEqual(template.params, self.params) | ||||
def test_get(self): | |||||
def test_getitem(self): | |||||
template = Template(name=self.name, params=self.params) | template = Template(name=self.name, params=self.params) | ||||
self.assertIs(template.get(0), self.bar) | |||||
self.assertIs(template.get(1), self.baz) | |||||
self.assertIs(template.get(2), self.eggs) | |||||
self.assertIs(template.get("1"), self.bar) | |||||
self.assertIs(template.get("2"), self.baz) | |||||
self.assertIs(template.get("eggs"), self.eggs) | |||||
self.assertIs(template[0], self.bar) | |||||
self.assertIs(template[1], self.baz) | |||||
self.assertIs(template[2], self.eggs) | |||||
self.assertIs(template["1"], self.bar) | |||||
self.assertIs(template["2"], self.baz) | |||||
self.assertIs(template["eggs"], self.eggs) | |||||
def test_render(self): | |||||
tests = [ | |||||
(Template(self.name), "{{foo}}"), | |||||
(Template(self.name, self.params), "{{foo|bar|baz|eggs=spam}}") | |||||
] | |||||
for template, rendered in tests: | |||||
self.assertEqual(template.render(), rendered) | |||||
def test_repr(self): | def test_repr(self): | ||||
correct1= 'Template(name=foo, params={})' | correct1= 'Template(name=foo, params={})' | ||||