Browse Source

Doc updates, and allow passing a starting context to tokenize().

tags/v0.3
Ben Kurtovic 10 years ago
parent
commit
67f1762aa4
11 changed files with 41 additions and 24 deletions
  1. +1
    -1
      CHANGELOG
  2. +8
    -0
      docs/api/mwparserfromhell.nodes.rst
  3. +3
    -2
      docs/api/mwparserfromhell.rst
  4. +1
    -1
      docs/changelog.rst
  5. +2
    -1
      mwparserfromhell/__init__.py
  6. +1
    -1
      mwparserfromhell/nodes/external_link.py
  7. +4
    -5
      mwparserfromhell/parser/__init__.py
  8. +4
    -3
      mwparserfromhell/parser/tokenizer.c
  9. +3
    -2
      mwparserfromhell/parser/tokenizer.py
  10. +11
    -5
      mwparserfromhell/utils.py
  11. +3
    -3
      tests/test_parser.py

+ 1
- 1
CHANGELOG View File

@@ -16,7 +16,7 @@ v0.3 (unreleased):
- Renamed Template.has_param() to has() for consistency with Template's other - Renamed Template.has_param() to has() for consistency with Template's other
methods; has_param() is now an alias. methods; has_param() is now an alias.
- The C tokenizer extension now works on Python 3 in addition to Python 2.7. - The C tokenizer extension now works on Python 3 in addition to Python 2.7.
- Various fixes and cleanup.
- Various bugfixes, internal changes, and cleanup.


v0.2 (released June 20, 2013): v0.2 (released June 20, 2013):




+ 8
- 0
docs/api/mwparserfromhell.nodes.rst View File

@@ -25,6 +25,14 @@ nodes Package
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:


:mod:`external_link` Module
---------------------------

.. automodule:: mwparserfromhell.nodes.external_link
:members:
:undoc-members:
:show-inheritance:

:mod:`heading` Module :mod:`heading` Module
--------------------- ---------------------




+ 3
- 2
docs/api/mwparserfromhell.rst View File

@@ -30,10 +30,10 @@ mwparserfromhell Package
:members: :members:
:undoc-members: :undoc-members:


:mod:`tag_defs` Module
:mod:`definitions` Module
-------------------------


.. automodule:: mwparserfromhell.tag_defs
.. automodule:: mwparserfromhell.definitions
:members: :members:


:mod:`utils` Module :mod:`utils` Module


+ 1
- 1
docs/changelog.rst View File

@@ -26,7 +26,7 @@ Unreleased
:py:meth:`~.Template.has` for consistency with :py:class:`~.Template`\ 's :py:meth:`~.Template.has` for consistency with :py:class:`~.Template`\ 's
other methods; :py:meth:`~.has_param` is now an alias. other methods; :py:meth:`~.has_param` is now an alias.
- The C tokenizer extension now works on Python 3 in addition to Python 2.7. - The C tokenizer extension now works on Python 3 in addition to Python 2.7.
- Various fixes and cleanup.
- Various bugfixes, internal changes, and cleanup.


v0.2 v0.2
---- ----


+ 2
- 1
mwparserfromhell/__init__.py View File

@@ -34,6 +34,7 @@ __license__ = "MIT License"
__version__ = "0.3.dev" __version__ = "0.3.dev"
__email__ = "ben.kurtovic@verizon.net" __email__ = "ben.kurtovic@verizon.net"


from . import compat, nodes, parser, smart_list, string_mixin, utils, wikicode
from . import (compat, definitions, nodes, parser, smart_list, string_mixin,
utils, wikicode)


parse = utils.parse_anything parse = utils.parse_anything

+ 1
- 1
mwparserfromhell/nodes/external_link.py View File

@@ -70,7 +70,7 @@ class ExternalLink(Node):


@property @property
def url(self): def url(self):
"""The url of the link target, as a :py:class:`~.Wikicode` object."""
"""The URL of the link target, as a :py:class:`~.Wikicode` object."""
return self._url return self._url


@property @property


+ 4
- 5
mwparserfromhell/parser/__init__.py View File

@@ -46,16 +46,15 @@ class Parser(object):
:py:class:`~.Node`\ s by the :py:class:`~.Builder`. :py:class:`~.Node`\ s by the :py:class:`~.Builder`.
""" """


def __init__(self, text):
self.text = text
def __init__(self):
if use_c and CTokenizer: if use_c and CTokenizer:
self._tokenizer = CTokenizer() self._tokenizer = CTokenizer()
else: else:
self._tokenizer = Tokenizer() self._tokenizer = Tokenizer()
self._builder = Builder() self._builder = Builder()


def parse(self):
"""Return a string as a parsed :py:class:`~.Wikicode` object tree."""
tokens = self._tokenizer.tokenize(self.text)
def parse(self, text, context=0):
"""Parse *text*, returning a :py:class:`~.Wikicode` object tree."""
tokens = self._tokenizer.tokenize(text, context)
code = self._builder.build(tokens) code = self._builder.build(tokens)
return code return code

+ 4
- 3
mwparserfromhell/parser/tokenizer.c View File

@@ -2667,8 +2667,9 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args)
{ {
PyObject *text, *temp; PyObject *text, *temp;
int context = 0;


if (PyArg_ParseTuple(args, "U", &text)) {
if (PyArg_ParseTuple(args, "U|i", &text, &context)) {
Py_XDECREF(self->text); Py_XDECREF(self->text);
self->text = PySequence_Fast(text, "expected a sequence"); self->text = PySequence_Fast(text, "expected a sequence");
} }
@@ -2677,7 +2678,7 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args)
Py_ssize_t size; Py_ssize_t size;
/* Failed to parse a Unicode object; try a string instead. */ /* Failed to parse a Unicode object; try a string instead. */
PyErr_Clear(); PyErr_Clear();
if (!PyArg_ParseTuple(args, "s#", &encoded, &size))
if (!PyArg_ParseTuple(args, "s#|i", &encoded, &size, &context))
return NULL; return NULL;
temp = PyUnicode_FromStringAndSize(encoded, size); temp = PyUnicode_FromStringAndSize(encoded, size);
if (!text) if (!text)
@@ -2689,7 +2690,7 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args)
} }
self->head = self->global = self->depth = self->cycles = 0; self->head = self->global = self->depth = self->cycles = 0;
self->length = PyList_GET_SIZE(self->text); self->length = PyList_GET_SIZE(self->text);
return Tokenizer_parse(self, 0, 1);
return Tokenizer_parse(self, context, 1);
} }


static int load_entitydefs(void) static int load_entitydefs(void)


+ 3
- 2
mwparserfromhell/parser/tokenizer.py View File

@@ -1125,8 +1125,9 @@ class Tokenizer(object):
self._emit_text(this) self._emit_text(this)
self._head += 1 self._head += 1


def tokenize(self, text):
def tokenize(self, text, context=0):
"""Build a list of tokens from a string of wikicode and return it.""" """Build a list of tokens from a string of wikicode and return it."""
split = self.regex.split(text) split = self.regex.split(text)
self._text = [segment for segment in split if segment] self._text = [segment for segment in split if segment]
return self._parse()
self._head = self._global = self._depth = self._cycles = 0
return self._parse(context)

+ 11
- 5
mwparserfromhell/utils.py View File

@@ -33,7 +33,7 @@ from .smart_list import SmartList


__all__ = ["parse_anything"] __all__ = ["parse_anything"]


def parse_anything(value):
def parse_anything(value, context=0):
"""Return a :py:class:`~.Wikicode` for *value*, allowing multiple types. """Return a :py:class:`~.Wikicode` for *value*, allowing multiple types.


This differs from :py:meth:`.Parser.parse` in that we accept more than just This differs from :py:meth:`.Parser.parse` in that we accept more than just
@@ -44,6 +44,12 @@ def parse_anything(value):
on-the-fly by various methods of :py:class:`~.Wikicode` and others like on-the-fly by various methods of :py:class:`~.Wikicode` and others like
:py:class:`~.Template`, such as :py:meth:`wikicode.insert() :py:class:`~.Template`, such as :py:meth:`wikicode.insert()
<.Wikicode.insert>` or setting :py:meth:`template.name <.Template.name>`. <.Wikicode.insert>` or setting :py:meth:`template.name <.Template.name>`.

If given, *context* will be passed as a starting context to the parser.
This is helpful when this function is used inside node attribute setters.
For example, :py:class:`~.ExternalLink`\ 's :py:attr:`~.ExternalLink.url`
setter sets *context* to :py:mod:`contexts.EXT_LINK_URI <.contexts>` to
prevent the URL itself from becoming an :py:class:`~.ExternalLink`.
""" """
from .parser import Parser from .parser import Parser
from .wikicode import Wikicode from .wikicode import Wikicode
@@ -53,17 +59,17 @@ def parse_anything(value):
elif isinstance(value, Node): elif isinstance(value, Node):
return Wikicode(SmartList([value])) return Wikicode(SmartList([value]))
elif isinstance(value, str): elif isinstance(value, str):
return Parser(value).parse()
return Parser().parse(value, context)
elif isinstance(value, bytes): elif isinstance(value, bytes):
return Parser(value.decode("utf8")).parse()
return Parser().parse(value.decode("utf8"), context)
elif isinstance(value, int): elif isinstance(value, int):
return Parser(str(value)).parse()
return Parser().parse(str(value), context)
elif value is None: elif value is None:
return Wikicode(SmartList()) return Wikicode(SmartList())
try: try:
nodelist = SmartList() nodelist = SmartList()
for item in value: for item in value:
nodelist += parse_anything(item).nodes
nodelist += parse_anything(item, context).nodes
except TypeError: except TypeError:
error = "Needs string, Node, Wikicode, int, None, or iterable of these, but got {0}: {1}" error = "Needs string, Node, Wikicode, int, None, or iterable of these, but got {0}: {1}"
raise ValueError(error.format(type(value).__name__, value)) raise ValueError(error.format(type(value).__name__, value))


+ 3
- 3
tests/test_parser.py View File

@@ -36,9 +36,9 @@ class TestParser(TreeEqualityTestCase):
def test_use_c(self): def test_use_c(self):
"""make sure the correct tokenizer is used""" """make sure the correct tokenizer is used"""
if parser.use_c: if parser.use_c:
self.assertTrue(parser.Parser(None)._tokenizer.USES_C)
self.assertTrue(parser.Parser()._tokenizer.USES_C)
parser.use_c = False parser.use_c = False
self.assertFalse(parser.Parser(None)._tokenizer.USES_C)
self.assertFalse(parser.Parser()._tokenizer.USES_C)


def test_parsing(self): def test_parsing(self):
"""integration test for parsing overall""" """integration test for parsing overall"""
@@ -59,7 +59,7 @@ class TestParser(TreeEqualityTestCase):
])) ]))
]) ])
]) ])
actual = parser.Parser(text).parse()
actual = parser.Parser().parse(text)
self.assertWikicodeEqual(expected, actual) self.assertWikicodeEqual(expected, actual)


if __name__ == "__main__": if __name__ == "__main__":


Loading…
Cancel
Save