Browse Source

mwtemplateparserfromhell -> mwparserfromhell; some additions; still incomplete

tags/v0.1
Ben Kurtovic 12 years ago
parent
commit
a880ff8bf2
9 changed files with 160 additions and 66 deletions
  1. +28
    -26
      README.rst
  2. +4
    -6
      mwparserfromhell/__init__.py
  3. +43
    -2
      mwparserfromhell/parameter.py
  4. +2
    -2
      mwparserfromhell/parser.py
  5. +29
    -9
      mwparserfromhell/template.py
  6. +6
    -6
      setup.py
  7. +22
    -2
      tests/test_parameter.py
  8. +9
    -4
      tests/test_parser.py
  9. +17
    -9
      tests/test_template.py

+ 28
- 26
README.rst View File

@@ -1,22 +1,21 @@
mwtemplateparserfromhell
mwparserfromhell
========================

**mwtemplateparserfromhell** (the *MediaWiki Template Parser from Hell*) is a
Python package that provides an easy-to-use and outrageously powerful template
parser for MediaWiki_ wikicode.
**mwparserfromhell** (the *MediaWiki Parser from Hell*) is a Python package
that provides an easy-to-use and outrageously powerful parser for MediaWiki_
wikicode.

Coded by Earwig_ and named by `Σ`_.
Developed by Earwig_ and named by `Σ`_.

Installation
------------

The easiest way to install the parser is through the `Python Package Index`_,
so you can install the latest release with ``pip install
mwtemplateparserfromhell`` (`get pip`_). Alternatively, get the latest
development version::
so you can install the latest release with ``pip install mwparserfromhell``
(`get pip`_). Alternatively, get the latest development version::

git clone git://github.com/earwig/mwtemplateparserfromhell.git mwtemplateparserfromhell
cd mwtemplateparserfromhell
git clone git://github.com/earwig/mwparserfromhell.git mwparserfromhell
cd mwparserfromhell
python setup.py install

You can run the comprehensive unit testing suite with ``python setup.py test``.
@@ -26,25 +25,28 @@ Usage

Normal usage is rather straightforward (where ``text`` is page text)::

>>> import mwtemplateparserfromhell
>>> parser = mwtemplateparserfromhell.Parser()
>>> import mwparserfromhell
>>> parser = mwparserfromhell.Parser()
>>> templates = parser.parse(text)

``templates`` is a list of ``mwtemplateparserfromhell.Template`` objects, which
contain a ``name`` attribute, a ``params`` attribute, and a ``get()`` method.
For example::
``templates`` is a list of ``mwparserfromhell.Template`` objects, which contain
a ``name`` attribute, a ``params`` attribute, and a ``render()`` method. Slices
are supported to get parameters. For example::

>>> templates = parser.parse("{{foo|bar|baz|eggs=spam}}")
>>> print templates
[Template(name="foo", params={"1": "bar", "2": "baz", "eggs": "spam"})]
>>> print templates[0].name
>>> template = templates[0]
>>> print template.name
foo
>>> print templates[0].params
>>> print template.params
['bar', 'baz']
>>> print templates[0].get(0)
>>> print template[0]
bar
>>> print templates[0].get("eggs")
>>> print template["eggs"]
spam
>>> print template.render()
{{foo|bar|baz|eggs=spam}}

If ``get``\ 's argument is a number *n*, it'll return the *n*\ th parameter,
otherwise it will return the parameter with the given name. Unnamed parameters
@@ -66,19 +68,19 @@ By default, nested templates are supported like so::
Integration
-----------

``mwtemplateparserfromhell`` is used by and originally developed for
EarwigBot_; ``Page`` objects have a ``parse_templates`` method that essentially
calls ``Parser().parse()`` on ``page.get()``.
``mwparserfromhell`` is used by and originally developed for EarwigBot_;
``Page`` objects have a ``parse_templates`` method that essentially calls
``Parser().parse()`` on ``page.get()``.

If you're using PyWikipedia_, your code might look like this::

import mwtemplateparserfromhell
import mwparserfromhell
import wikipedia as pywikibot
def parse_templates(title):
site = pywikibot.get_site()
page = pywikibot.Page(site, title)
text = page.get()
parser = mwtemplateparserfromhell.Parser()
parser = mwparserfromhell.Parser()
return parser.parse(text)

If you're not using a library, you can parse templates in any page using the
@@ -86,13 +88,13 @@ following code (via the API_)::

import json
import urllib
import mwtemplateparserfromhell
import mwparserfromhell
API_URL = "http://en.wikipedia.org/w/api.php"
def parse_templates(title):
raw = urllib.urlopen(API_URL, data).read()
res = json.loads(raw)
text = res["query"]["pages"].values()[0]["revisions"][0]["*"]
parser = mwtemplateparserfromhell.Parser()
parser = mwparserfromhell.Parser()
return parser.parse(text)

.. _MediaWiki: http://mediawiki.org


mwtemplateparserfromhell/__init__.py → mwparserfromhell/__init__.py View File

@@ -21,11 +21,9 @@
# SOFTWARE.

"""
`mwtemplateparserfromhell
<https://github.com/earwig/mwtemplateparserfromhell>`_ (the MediaWiki Template
`mwparserfromhell <https://github.com/earwig/mwparserfromhell>`_ (the MediaWiki
Parser from Hell) is a Python package that provides an easy-to-use and
outrageously powerful template parser for `MediaWiki <http://mediawiki.org>`_
wikicode.
outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode.
"""

__author__ = "Ben Kurtovic"
@@ -34,5 +32,5 @@ __license__ = "MIT License"
__version__ = "0.1.dev"
__email__ = "ben.kurtovic@verizon.net"

from mwtemplateparserfromhell import parameter, parser, template
from mwtemplateparserfromhell.parser import Parser
from mwparserfromhell import parameter, parser, template
from mwparserfromhell.parser import Parser

mwtemplateparserfromhell/parameter.py → mwparserfromhell/parameter.py View File

@@ -20,6 +20,8 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from mwparserfromhell.template import Template

__all__ = ["Parameter"]

class Parameter(object):
@@ -49,13 +51,13 @@ class Parameter(object):

def __eq__(self, other):
if isinstance(other, Parameter):
return (self.value == other.value and
return (self.name == other.name and self.value == other.value and
self.templates == other.templates)
return self.value == other

def __ne__(self, other):
if isinstance(other, Parameter):
return (self.value != other.value or
return (self.name != other.name or self.value != other.value or
self.templates != other.templates)
return self.value != other

@@ -79,9 +81,48 @@ class Parameter(object):
for char in self.value:
yield char

def __getitem__(self, key):
return self.value[key]

def __contains__(self, item):
return item in self.value or item in self.templates

def __add__(self, other):
if isinstance(other, Parameter):
return Parameter(self.name, self.value + other.value,
self.templates + other.templates)
if isinstance(other, Template):
return Parameter(self.name, self.value + other.render(),
self.templates + [other])
return self.value + other

def __radd__(self, other):
if isinstance(other, Template):
return Template(other.name, other.params + [self])
return other + self.value

def __iadd__(self, other):
if isinstance(other, Parameter):
self.value += other.value
self.templates += other.templates
elif isinstance(other, Template):
self.value += other.render()
self.templates.append(other)
else:
self.value += other
return self

def __mul__(self, other):
return Parameter(self.name, self.value * other, self.templates * other)

def __rmul__(self, other):
return Parameter(self.name, other * self.value, other * self.templates)

def __imul__(self, other):
self.value *= other
self.templates *= other
return self

@property
def name(self):
return self._name

mwtemplateparserfromhell/parser.py → mwparserfromhell/parser.py View File

@@ -20,8 +20,8 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from mwtemplateparserfromhell.parameter import Parameter
from mwtemplateparserfromhell.template import Template
from mwparserfromhell.parameter import Parameter
from mwparserfromhell.template import Template

__all__ = ["Parser"]


mwtemplateparserfromhell/template.py → mwparserfromhell/template.py View File

@@ -41,13 +41,30 @@ class Template(object):

def __eq__(self, other):
if isinstance(other, Template):
return self.name == other.name and self._params == other._params
return False
return self.name == other.name and self.params == other.params
return self.render() == other

def __ne__(self, other):
if isinstance(other, Template):
return self.name != other.name or self._params != other._params
return True
return self.name != other.name or self.params != other.params
return self.render() != other

def __getitem__(self, key):
try:
return self._params[key]
except KeyError: # Try lookup by order in param list
return self._params.values()[key]

def __setitem__(self, key, value):
if isinstance(key, int):
if key > len(self._params):
raise IndexError("Index is too large")
elif key == len(self._params): # Simple addition to the end
self._params[key] = value
else: # We'll need to rebuild the OrderedDict
self._params
else:
self._params[key] = value

@property
def name(self):
@@ -57,8 +74,11 @@ class Template(object):
def params(self):
return self._params.values()

def get(self, name):
try:
return self._params[name]
except KeyError: # Try lookup by order in param list
return self._params.values()[name]
def render(self):
params = ""
for param in self.params:
if param.name.isdigit() and "=" not in param.value:
params += "|" + param.value
else:
params += "|" + param.name + "=" + param.value
return "{{" + self.name + params + "}}"

+ 6
- 6
setup.py View File

@@ -23,23 +23,23 @@

from setuptools import setup, find_packages

from mwtemplateparserfromhell import __version__
from mwparserfromhell import __version__

with open("README.rst") as fp:
long_docs = fp.read()

setup(
name = "mwtemplateparserfromhell",
name = "mwparserfromhell",
packages = find_packages(exclude=("tests",)),
test_suite = "tests",
version = __version__,
author = "Ben Kurtovic",
author_email = "ben.kurtovic@verizon.net",
url = "https://github.com/earwig/mwtemplateparserfromhell",
description = "MWTemplateParserFromHell is a parser for MediaWiki templates.",
url = "https://github.com/earwig/mwparserfromhell",
description = "MWParserFromHell is a parser for MediaWiki wikicode.",
long_description = long_docs,
download_url = "https://github.com/earwig/mwtemplateparserfromhell/tarball/v{0}".format(__version__),
keywords = "earwig mwtemplateparserfromhell wikipedia wiki mediawiki template parsing",
download_url = "https://github.com/earwig/mwparserfromhell/tarball/v{0}".format(__version__),
keywords = "earwig mwparserfromhell wikipedia wiki mediawiki wikicode template parsing",
license = "MIT License",
classifiers = [
"Development Status :: 3 - Alpha",


+ 22
- 2
tests/test_parameter.py View File

@@ -22,8 +22,8 @@

import unittest

from mwtemplateparserfromhell.parameter import Parameter
from mwtemplateparserfromhell.template import Template
from mwparserfromhell.parameter import Parameter
from mwparserfromhell.template import Template

class TestParameter(unittest.TestCase):
def setUp(self):
@@ -93,7 +93,27 @@ class TestParameter(unittest.TestCase):
self.assertEquals(bool(param), bool(param.value))
self.assertEquals(len(param), len(param.value))
self.assertEquals(list(param), list(param.value))
self.assertEquals(param[2], param.value[2])
self.assertEquals(list(reversed(param)),
list(reversed(param.value)))
self.assertIs("bar" in param, "bar" in param.value)
self.assertEquals(param + "test", param.value + "test")
self.assertEquals("test" + param, "test" + param.value)
# add param
# add template left
# add template right
self.assertEquals(param * 3, Parameter(param.name, param.value * 3,
param.templates * 3))
self.assertEquals(3 * param, Parameter(param.name, 3 * param.value,
3 * param.templates))
# add param inplace
# add template implace
# add str inplace
# multiply int inplace
self.assertIsInstance(param, Parameter)
self.assertIsInstance(param.value, str)

if __name__ == "__main__":
unittest.main(verbosity=2)

+ 9
- 4
tests/test_parser.py View File

@@ -22,9 +22,9 @@

import unittest

from mwtemplateparserfromhell.parameter import Parameter
from mwtemplateparserfromhell.parser import Parser
from mwtemplateparserfromhell.template import Template
from mwparserfromhell.parameter import Parameter
from mwparserfromhell.parser import Parser
from mwparserfromhell.template import Template

TESTS = [
("", []),
@@ -37,7 +37,6 @@ TESTS = [
("multiple {{-}} templates {{+}}!", [Template("-"), Template("+")]),
("{{{no templates here}}}", []),
("{ {{templates here}}}", [Template("templates here")]),
("{{{{I exist}} }}", [Template("I exist")]),
("{{{{I do not exist}}}}", []),
("{{foo|bar|baz|eggs=spam}}",
[Template("foo", [Parameter("1", "bar"), Parameter("2", "baz"),
@@ -46,6 +45,12 @@ TESTS = [
[Template("abc def", [Parameter("1", "ghi"), Parameter("jk", "lmno"),
Parameter("2", "pqr"), Parameter("st", "uv"),
Parameter("3", "wx"), Parameter("4", "yz")])]),
("{{this has a|{{template}}|inside of it}}",
[Template("this has a", [Parameter("1", "{{template}}",
[Template("template")]),
Parameter("2", "inside of it")])]),
("{{{{I exist}} }}", [Template("I exist", [] )]),
("{{}}")
]

class TestParser(unittest.TestCase):


+ 17
- 9
tests/test_template.py View File

@@ -23,8 +23,8 @@
from itertools import permutations
import unittest

from mwtemplateparserfromhell.parameter import Parameter
from mwtemplateparserfromhell.template import Template
from mwparserfromhell.parameter import Parameter
from mwparserfromhell.template import Template

class TestTemplate(unittest.TestCase):
def setUp(self):
@@ -57,14 +57,22 @@ class TestTemplate(unittest.TestCase):
Template(name=self.name, params=self.params)):
self.assertEqual(template.params, self.params)

def test_get(self):
def test_getitem(self):
template = Template(name=self.name, params=self.params)
self.assertIs(template.get(0), self.bar)
self.assertIs(template.get(1), self.baz)
self.assertIs(template.get(2), self.eggs)
self.assertIs(template.get("1"), self.bar)
self.assertIs(template.get("2"), self.baz)
self.assertIs(template.get("eggs"), self.eggs)
self.assertIs(template[0], self.bar)
self.assertIs(template[1], self.baz)
self.assertIs(template[2], self.eggs)
self.assertIs(template["1"], self.bar)
self.assertIs(template["2"], self.baz)
self.assertIs(template["eggs"], self.eggs)

def test_render(self):
tests = [
(Template(self.name), "{{foo}}"),
(Template(self.name, self.params), "{{foo|bar|baz|eggs=spam}}")
]
for template, rendered in tests:
self.assertEqual(template.render(), rendered)

def test_repr(self):
correct1= 'Template(name=foo, params={})'


Loading…
Cancel
Save