Doc updates, and allow passing a starting context to tokenize().

11 years ago · 67f1762aa4
--- a/+ 1
+++ b/+ 1
@@ -16,7 +16,7 @@ v0.3 (unreleased):
 - Renamed Template.has_param() to has() for consistency with Template's other
  methods; has_param() is now an alias.
 - The C tokenizer extension now works on Python 3 in addition to Python 2.7.
 - Various fixes and cleanup.
 - Various bugfixes, internal changes, and cleanup.
 v0.2 (released June 20, 2013):
--- a/docs/api/mwparserfromhell.nodes.rst
+++ b/docs/api/mwparserfromhell.nodes.rst
@@ -25,6 +25,14 @@ nodes Package
    :undoc-members:
    :show-inheritance:
 :mod:`external_link` Module
 ---------------------------
 .. automodule:: mwparserfromhell.nodes.external_link
    :members:
    :undoc-members:
    :show-inheritance:
 :mod:`heading` Module
 ---------------------
--- a/docs/api/mwparserfromhell.rst
+++ b/docs/api/mwparserfromhell.rst
@@ -30,10 +30,10 @@ mwparserfromhell Package
    :members:
    :undoc-members:
 :mod:`tag_defs` Module
 :mod:`definitions` Module
 -------------------------
 .. automodule:: mwparserfromhell.tag_defs
 .. automodule:: mwparserfromhell.definitions
    :members:
 :mod:`utils` Module
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -26,7 +26,7 @@ Unreleased
  :py:meth:`~.Template.has` for consistency with :py:class:`~.Template`\ 's
  other methods; :py:meth:`~.has_param` is now an alias.
 - The C tokenizer extension now works on Python 3 in addition to Python 2.7.
 - Various fixes and cleanup.
 - Various bugfixes, internal changes, and cleanup.
 v0.2
 ----
--- a/mwparserfromhell/init.py
+++ b/mwparserfromhell/init.py
@@ -34,6 +34,7 @@ __license__ = "MIT License"
 __version__ = "0.3.dev"
 __email__ = "ben.kurtovic@verizon.net"
 from . import compat, nodes, parser, smart_list, string_mixin, utils, wikicode
 from . import (compat, definitions, nodes, parser, smart_list, string_mixin,
               utils, wikicode)
 parse = utils.parse_anything
--- a/mwparserfromhell/nodes/external_link.py
+++ b/mwparserfromhell/nodes/external_link.py
@@ -70,7 +70,7 @@ class ExternalLink(Node):
    @property
    def url(self):
        """The url of the link target, as a :py:class:`~.Wikicode` object."""
        """The URL of the link target, as a :py:class:`~.Wikicode` object."""
        return self._url
    @property
--- a/mwparserfromhell/parser/init.py
+++ b/mwparserfromhell/parser/init.py
@@ -46,16 +46,15 @@ class Parser(object):
    :py:class:`~.Node`\ s by the :py:class:`~.Builder`.
    """
    def __init__(self, text):
        self.text = text
    def __init__(self):
        if use_c and CTokenizer:
            self._tokenizer = CTokenizer()
        else:
            self._tokenizer = Tokenizer()
        self._builder = Builder()
    def parse(self):
        """Return a string as a parsed :py:class:`~.Wikicode` object tree."""
        tokens = self._tokenizer.tokenize(self.text)
    def parse(self, text, context=0):
        """Parse *text*, returning a :py:class:`~.Wikicode` object tree."""
        tokens = self._tokenizer.tokenize(text, context)
        code = self._builder.build(tokens)
        return code
--- a/mwparserfromhell/parser/tokenizer.c
+++ b/mwparserfromhell/parser/tokenizer.c
@@ -2667,8 +2667,9 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
 static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args)
 {
    PyObject *text, *temp;
    int context = 0;
    if (PyArg_ParseTuple(args, "U", &text)) {
    if (PyArg_ParseTuple(args, "U|i", &text, &context)) {
        Py_XDECREF(self->text);
        self->text = PySequence_Fast(text, "expected a sequence");
    }
@@ -2677,7 +2678,7 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args)
        Py_ssize_t size;
        /* Failed to parse a Unicode object; try a string instead. */
        PyErr_Clear();
        if (!PyArg_ParseTuple(args, "s#", &encoded, &size))
        if (!PyArg_ParseTuple(args, "s#|i", &encoded, &size, &context))
            return NULL;
        temp = PyUnicode_FromStringAndSize(encoded, size);
        if (!text)
@@ -2689,7 +2690,7 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args)
    }
    self->head = self->global = self->depth = self->cycles = 0;
    self->length = PyList_GET_SIZE(self->text);
    return Tokenizer_parse(self, 0, 1);
    return Tokenizer_parse(self, context, 1);
 }
 static int load_entitydefs(void)
--- a/mwparserfromhell/parser/tokenizer.py
+++ b/mwparserfromhell/parser/tokenizer.py
@@ -1125,8 +1125,9 @@ class Tokenizer(object):
                self._emit_text(this)
            self._head += 1
    def tokenize(self, text):
    def tokenize(self, text, context=0):
        """Build a list of tokens from a string of wikicode and return it."""
        split = self.regex.split(text)
        self._text = [segment for segment in split if segment]
        return self._parse()
        self._head = self._global = self._depth = self._cycles = 0
        return self._parse(context)
--- a/mwparserfromhell/utils.py
+++ b/mwparserfromhell/utils.py
@@ -33,7 +33,7 @@ from .smart_list import SmartList
 __all__ = ["parse_anything"]
 def parse_anything(value):
 def parse_anything(value, context=0):
    """Return a :py:class:`~.Wikicode` for *value*, allowing multiple types.
    This differs from :py:meth:`.Parser.parse` in that we accept more than just
@@ -44,6 +44,12 @@ def parse_anything(value):
    on-the-fly by various methods of :py:class:`~.Wikicode` and others like
    :py:class:`~.Template`, such as :py:meth:`wikicode.insert()
    <.Wikicode.insert>` or setting :py:meth:`template.name <.Template.name>`.
    If given, *context* will be passed as a starting context to the parser.
    This is helpful when this function is used inside node attribute setters.
    For example, :py:class:`~.ExternalLink`\ 's :py:attr:`~.ExternalLink.url`
    setter sets *context* to :py:mod:`contexts.EXT_LINK_URI <.contexts>` to
    prevent the URL itself from becoming an :py:class:`~.ExternalLink`.
    """
    from .parser import Parser
    from .wikicode import Wikicode
@@ -53,17 +59,17 @@ def parse_anything(value):
    elif isinstance(value, Node):
        return Wikicode(SmartList([value]))
    elif isinstance(value, str):
        return Parser(value).parse()
        return Parser().parse(value, context)
    elif isinstance(value, bytes):
        return Parser(value.decode("utf8")).parse()
        return Parser().parse(value.decode("utf8"), context)
    elif isinstance(value, int):
        return Parser(str(value)).parse()
        return Parser().parse(str(value), context)
    elif value is None:
        return Wikicode(SmartList())
    try:
        nodelist = SmartList()
        for item in value:
            nodelist += parse_anything(item).nodes
            nodelist += parse_anything(item, context).nodes
    except TypeError:
        error = "Needs string, Node, Wikicode, int, None, or iterable of these, but got {0}: {1}"
        raise ValueError(error.format(type(value).__name__, value))
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -36,9 +36,9 @@ class TestParser(TreeEqualityTestCase):
    def test_use_c(self):
        """make sure the correct tokenizer is used"""
        if parser.use_c:
            self.assertTrue(parser.Parser(None)._tokenizer.USES_C)
            self.assertTrue(parser.Parser()._tokenizer.USES_C)
            parser.use_c = False
        self.assertFalse(parser.Parser(None)._tokenizer.USES_C)
        self.assertFalse(parser.Parser()._tokenizer.USES_C)
    def test_parsing(self):
        """integration test for parsing overall"""
@@ -59,7 +59,7 @@ class TestParser(TreeEqualityTestCase):
                ]))
            ])
        ])
        actual = parser.Parser(text).parse()
        actual = parser.Parser().parse(text)
        self.assertWikicodeEqual(expected, actual)
 if __name__ == "__main__":