From 67f1762aa402a7dee1b96f80e8d9d2521fe8b069 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 23 Aug 2013 23:23:22 -0400 Subject: [PATCH] Doc updates, and allow passing a starting context to tokenize(). --- CHANGELOG | 2 +- docs/api/mwparserfromhell.nodes.rst | 8 ++++++++ docs/api/mwparserfromhell.rst | 6 +++--- docs/changelog.rst | 2 +- mwparserfromhell/__init__.py | 3 ++- mwparserfromhell/nodes/external_link.py | 2 +- mwparserfromhell/parser/__init__.py | 9 ++++----- mwparserfromhell/parser/tokenizer.c | 7 ++++--- mwparserfromhell/parser/tokenizer.py | 5 +++-- mwparserfromhell/utils.py | 16 +++++++++++----- tests/test_parser.py | 6 +++--- 11 files changed, 41 insertions(+), 25 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 84edc60..122247f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -16,7 +16,7 @@ v0.3 (unreleased): - Renamed Template.has_param() to has() for consistency with Template's other methods; has_param() is now an alias. - The C tokenizer extension now works on Python 3 in addition to Python 2.7. -- Various fixes and cleanup. +- Various bugfixes, internal changes, and cleanup. v0.2 (released June 20, 2013): diff --git a/docs/api/mwparserfromhell.nodes.rst b/docs/api/mwparserfromhell.nodes.rst index a093c17..7043070 100644 --- a/docs/api/mwparserfromhell.nodes.rst +++ b/docs/api/mwparserfromhell.nodes.rst @@ -25,6 +25,14 @@ nodes Package :undoc-members: :show-inheritance: +:mod:`external_link` Module +--------------------------- + +.. automodule:: mwparserfromhell.nodes.external_link + :members: + :undoc-members: + :show-inheritance: + :mod:`heading` Module --------------------- diff --git a/docs/api/mwparserfromhell.rst b/docs/api/mwparserfromhell.rst index b682139..0da522e 100644 --- a/docs/api/mwparserfromhell.rst +++ b/docs/api/mwparserfromhell.rst @@ -30,10 +30,10 @@ mwparserfromhell Package :members: :undoc-members: -:mod:`tag_defs` Module ----------------------- +:mod:`definitions` Module +------------------------- -.. automodule:: mwparserfromhell.tag_defs +.. automodule:: mwparserfromhell.definitions :members: :mod:`utils` Module diff --git a/docs/changelog.rst b/docs/changelog.rst index 810f594..f43a3c9 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -26,7 +26,7 @@ Unreleased :py:meth:`~.Template.has` for consistency with :py:class:`~.Template`\ 's other methods; :py:meth:`~.has_param` is now an alias. - The C tokenizer extension now works on Python 3 in addition to Python 2.7. -- Various fixes and cleanup. +- Various bugfixes, internal changes, and cleanup. v0.2 ---- diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index 738d4c2..74e1616 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -34,6 +34,7 @@ __license__ = "MIT License" __version__ = "0.3.dev" __email__ = "ben.kurtovic@verizon.net" -from . import compat, nodes, parser, smart_list, string_mixin, utils, wikicode +from . import (compat, definitions, nodes, parser, smart_list, string_mixin, + utils, wikicode) parse = utils.parse_anything diff --git a/mwparserfromhell/nodes/external_link.py b/mwparserfromhell/nodes/external_link.py index 2ee37f3..bf1c9b1 100644 --- a/mwparserfromhell/nodes/external_link.py +++ b/mwparserfromhell/nodes/external_link.py @@ -70,7 +70,7 @@ class ExternalLink(Node): @property def url(self): - """The url of the link target, as a :py:class:`~.Wikicode` object.""" + """The URL of the link target, as a :py:class:`~.Wikicode` object.""" return self._url @property diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py index 1fb95b5..22c3dc2 100644 --- a/mwparserfromhell/parser/__init__.py +++ b/mwparserfromhell/parser/__init__.py @@ -46,16 +46,15 @@ class Parser(object): :py:class:`~.Node`\ s by the :py:class:`~.Builder`. """ - def __init__(self, text): - self.text = text + def __init__(self): if use_c and CTokenizer: self._tokenizer = CTokenizer() else: self._tokenizer = Tokenizer() self._builder = Builder() - def parse(self): - """Return a string as a parsed :py:class:`~.Wikicode` object tree.""" - tokens = self._tokenizer.tokenize(self.text) + def parse(self, text, context=0): + """Parse *text*, returning a :py:class:`~.Wikicode` object tree.""" + tokens = self._tokenizer.tokenize(text, context) code = self._builder.build(tokens) return code diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 3dca5c2..af6bf3b 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -2667,8 +2667,9 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) { PyObject *text, *temp; + int context = 0; - if (PyArg_ParseTuple(args, "U", &text)) { + if (PyArg_ParseTuple(args, "U|i", &text, &context)) { Py_XDECREF(self->text); self->text = PySequence_Fast(text, "expected a sequence"); } @@ -2677,7 +2678,7 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) Py_ssize_t size; /* Failed to parse a Unicode object; try a string instead. */ PyErr_Clear(); - if (!PyArg_ParseTuple(args, "s#", &encoded, &size)) + if (!PyArg_ParseTuple(args, "s#|i", &encoded, &size, &context)) return NULL; temp = PyUnicode_FromStringAndSize(encoded, size); if (!text) @@ -2689,7 +2690,7 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) } self->head = self->global = self->depth = self->cycles = 0; self->length = PyList_GET_SIZE(self->text); - return Tokenizer_parse(self, 0, 1); + return Tokenizer_parse(self, context, 1); } static int load_entitydefs(void) diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 2c8d6d7..6ab549a 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -1125,8 +1125,9 @@ class Tokenizer(object): self._emit_text(this) self._head += 1 - def tokenize(self, text): + def tokenize(self, text, context=0): """Build a list of tokens from a string of wikicode and return it.""" split = self.regex.split(text) self._text = [segment for segment in split if segment] - return self._parse() + self._head = self._global = self._depth = self._cycles = 0 + return self._parse(context) diff --git a/mwparserfromhell/utils.py b/mwparserfromhell/utils.py index 31e5ba0..758e751 100644 --- a/mwparserfromhell/utils.py +++ b/mwparserfromhell/utils.py @@ -33,7 +33,7 @@ from .smart_list import SmartList __all__ = ["parse_anything"] -def parse_anything(value): +def parse_anything(value, context=0): """Return a :py:class:`~.Wikicode` for *value*, allowing multiple types. This differs from :py:meth:`.Parser.parse` in that we accept more than just @@ -44,6 +44,12 @@ def parse_anything(value): on-the-fly by various methods of :py:class:`~.Wikicode` and others like :py:class:`~.Template`, such as :py:meth:`wikicode.insert() <.Wikicode.insert>` or setting :py:meth:`template.name <.Template.name>`. + + If given, *context* will be passed as a starting context to the parser. + This is helpful when this function is used inside node attribute setters. + For example, :py:class:`~.ExternalLink`\ 's :py:attr:`~.ExternalLink.url` + setter sets *context* to :py:mod:`contexts.EXT_LINK_URI <.contexts>` to + prevent the URL itself from becoming an :py:class:`~.ExternalLink`. """ from .parser import Parser from .wikicode import Wikicode @@ -53,17 +59,17 @@ def parse_anything(value): elif isinstance(value, Node): return Wikicode(SmartList([value])) elif isinstance(value, str): - return Parser(value).parse() + return Parser().parse(value, context) elif isinstance(value, bytes): - return Parser(value.decode("utf8")).parse() + return Parser().parse(value.decode("utf8"), context) elif isinstance(value, int): - return Parser(str(value)).parse() + return Parser().parse(str(value), context) elif value is None: return Wikicode(SmartList()) try: nodelist = SmartList() for item in value: - nodelist += parse_anything(item).nodes + nodelist += parse_anything(item, context).nodes except TypeError: error = "Needs string, Node, Wikicode, int, None, or iterable of these, but got {0}: {1}" raise ValueError(error.format(type(value).__name__, value)) diff --git a/tests/test_parser.py b/tests/test_parser.py index ec5f065..8760c0e 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -36,9 +36,9 @@ class TestParser(TreeEqualityTestCase): def test_use_c(self): """make sure the correct tokenizer is used""" if parser.use_c: - self.assertTrue(parser.Parser(None)._tokenizer.USES_C) + self.assertTrue(parser.Parser()._tokenizer.USES_C) parser.use_c = False - self.assertFalse(parser.Parser(None)._tokenizer.USES_C) + self.assertFalse(parser.Parser()._tokenizer.USES_C) def test_parsing(self): """integration test for parsing overall""" @@ -59,7 +59,7 @@ class TestParser(TreeEqualityTestCase): ])) ]) ]) - actual = parser.Parser(text).parse() + actual = parser.Parser().parse(text) self.assertWikicodeEqual(expected, actual) if __name__ == "__main__":