diff --git a/README.rst b/README.rst index 25fdc64..66bc41a 100644 --- a/README.rst +++ b/README.rst @@ -14,7 +14,7 @@ The easiest way to install the parser is through the `Python Package Index`_, so you can install the latest release with ``pip install mwparserfromhell`` (`get pip`_). Alternatively, get the latest development version:: - git clone git://github.com/earwig/mwparserfromhell.git mwparserfromhell + git clone git://github.com/earwig/mwparserfromhell.git cd mwparserfromhell python setup.py install @@ -63,7 +63,7 @@ nested templates:: >>> print foo.get(1).value.filter_templates()[0].get(1).value template -Additionally, you can get include nested templates in ``filter_templates()`` by +Additionally, you can include nested templates in ``filter_templates()`` by passing ``recursive=True``:: >>> text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" diff --git a/docs/index.rst b/docs/index.rst index 502f500..84b4c74 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,9 +1,9 @@ MWParserFromHell v0.1 Documentation =================================== -**mwparserfromhell** (the *MediaWiki Parser from Hell*) is a Python package -that provides an easy-to-use and outrageously powerful parser for MediaWiki_ -wikicode. It supports Python 2 and Python 3. +:py:mod:`mwparserfromhell` (the *MediaWiki Parser from Hell*) is a Python +package that provides an easy-to-use and outrageously powerful parser for +MediaWiki_ wikicode. It supports Python 2 and Python 3. Developed by Earwig_ with help from `Σ`_. @@ -11,12 +11,30 @@ Developed by Earwig_ with help from `Σ`_. .. _Earwig: http://en.wikipedia.org/wiki/User:The_Earwig .. _Σ: http://en.wikipedia.org/wiki/User:Σ +Installation +------------ + +The easiest way to install the parser is through the `Python Package Index`_, +so you can install the latest release with ``pip install mwparserfromhell`` +(`get pip`_). Alternatively, get the latest development version:: + + git clone git://github.com/earwig/mwparserfromhell.git + cd mwparserfromhell + python setup.py install + +You can run the comprehensive unit testing suite with ``python setup.py test``. + +.. _Python Package Index: http://pypi.python.org +.. _get pip: http://pypi.python.org/pypi/pip + Contents -------- .. toctree:: :maxdepth: 2 + usage + integration API Reference diff --git a/docs/integration.rst b/docs/integration.rst new file mode 100644 index 0000000..d0e54db --- /dev/null +++ b/docs/integration.rst @@ -0,0 +1,35 @@ +Integration +=========== + +:py:mod:`mwparserfromhell` is used by and originally developed for EarwigBot_; +:py:class:`~earwigbot.wiki.page.Page` objects have a +:py:meth:`~earwigbot.wiki.page.Page.parse` method that essentially calls +:py:func:`mwparserfromhell.parse() ` on +:py:meth:`~earwigbot.wiki.page.Page.get`. + +If you're using PyWikipedia_, your code might look like this:: + + import mwparserfromhell + import wikipedia as pywikibot + def parse(title): + site = pywikibot.get_site() + page = pywikibot.Page(site, title) + text = page.get() + return mwparserfromhell.parse(text) + +If you're not using a library, you can parse templates in any page using the +following code (via the API_):: + + import json + import urllib + import mwparserfromhell + API_URL = "http://en.wikipedia.org/w/api.php" + def parse(title): + raw = urllib.urlopen(API_URL, data).read() + res = json.loads(raw) + text = res["query"]["pages"].values()[0]["revisions"][0]["*"] + return mwparserfromhell.parse(text) + +.. _EarwigBot: https://github.com/earwig/earwigbot +.. _PyWikipedia: http://pywikipediabot.sourceforge.net/ +.. _API: http://mediawiki.org/wiki/API diff --git a/docs/usage.rst b/docs/usage.rst new file mode 100644 index 0000000..c4472f9 --- /dev/null +++ b/docs/usage.rst @@ -0,0 +1,82 @@ +Usage +===== + +Normal usage is rather straightforward (where ``text`` is page text):: + + >>> import mwparserfromhell + >>> wikicode = mwparserfromhell.parse(text) + +``wikicode`` is a :py:class:`mwparserfromhell.Wikicode <.Wikicode>` object, +which acts like an ordinary ``unicode`` object (or ``str`` in Python 3) with +some extra methods. For example:: + + >>> text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" + >>> wikicode = mwparserfromhell.parse(text) + >>> print wikicode + I has a template! {{foo|bar|baz|eggs=spam}} See it? + >>> templates = wikicode.filter_templates() + >>> print templates + ['{{foo|bar|baz|eggs=spam}}'] + >>> template = templates[0] + >>> print template.name + foo + >>> print template.params + ['bar', 'baz', 'eggs=spam'] + >>> print template.get(1).value + bar + >>> print template.get("eggs").value + spam + +Since every node you reach is also a :py:class:`~.Wikicode` object, it's +trivial to get nested templates:: + + >>> code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") + >>> print code.filter_templates() + ['{{foo|this {{includes a|template}}}}'] + >>> foo = code.filter_templates()[0] + >>> print foo.get(1).value + this {{includes a|template}} + >>> print foo.get(1).value.filter_templates()[0] + {{includes a|template}} + >>> print foo.get(1).value.filter_templates()[0].get(1).value + template + +Additionally, you can include nested templates in :py:meth:`~.filter_templates` +by passing *recursive=True*:: + + >>> text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" + >>> mwparserfromhell.parse(text).filter_templates(recursive=True) + ['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}'] + +Templates can be easily modified to add, remove alter or params. +:py:class:`~.Wikicode` can also be treated like a list with +:py:meth:`~.Wikicode.append`, :py:meth:`~.Wikicode.insert`, +:py:meth:`~.Wikicode.remove`, :py:meth:`~.Wikicode.replace`, and more:: + + >>> text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}" + >>> code = mwparserfromhell.parse(text) + >>> for template in code.filter_templates(): + ... if template.name == "cleanup" and not template.has_param("date"): + ... template.add("date", "July 2012") + ... + >>> print code + {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}} + >>> code.replace("{{uncategorized}}", "{{bar-stub}}") + >>> print code + {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} + >>> print code.filter_templates() + ['{{cleanup|date=July 2012}}', '{{bar-stub}}'] + +You can then convert ``code`` back into a regular :py:class:`unicode` object +(for saving the page!) by calling :py:func:`unicode` on it:: + + >>> text = unicode(code) + >>> print text + {{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} + >>> text == code + True + +(Likewise, use :py:func:`str(code) ` in Python 3.) + +For more tips, check out :py:class:`Wikicode's full method list <.Wikicode>` +and the :py:mod:`list of Nodes <.nodes>`. diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index cf752c4..c549209 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -66,7 +66,7 @@ class Tokenizer(object): @property def _textbuffer(self): - """Return the current textbuffer.""" + """The current textbuffer.""" return self._stacks[-1][2] @_textbuffer.setter