@@ -16,7 +16,7 @@ v0.3 (unreleased): | |||||
- Renamed Template.has_param() to has() for consistency with Template's other | - Renamed Template.has_param() to has() for consistency with Template's other | ||||
methods; has_param() is now an alias. | methods; has_param() is now an alias. | ||||
- The C tokenizer extension now works on Python 3 in addition to Python 2.7. | - The C tokenizer extension now works on Python 3 in addition to Python 2.7. | ||||
- Various fixes and cleanup. | |||||
- Various bugfixes, internal changes, and cleanup. | |||||
v0.2 (released June 20, 2013): | v0.2 (released June 20, 2013): | ||||
@@ -25,6 +25,14 @@ nodes Package | |||||
:undoc-members: | :undoc-members: | ||||
:show-inheritance: | :show-inheritance: | ||||
:mod:`external_link` Module | |||||
--------------------------- | |||||
.. automodule:: mwparserfromhell.nodes.external_link | |||||
:members: | |||||
:undoc-members: | |||||
:show-inheritance: | |||||
:mod:`heading` Module | :mod:`heading` Module | ||||
--------------------- | --------------------- | ||||
@@ -30,10 +30,10 @@ mwparserfromhell Package | |||||
:members: | :members: | ||||
:undoc-members: | :undoc-members: | ||||
:mod:`tag_defs` Module | |||||
:mod:`definitions` Module | |||||
------------------------- | |||||
.. automodule:: mwparserfromhell.tag_defs | |||||
.. automodule:: mwparserfromhell.definitions | |||||
:members: | :members: | ||||
:mod:`utils` Module | :mod:`utils` Module | ||||
@@ -26,7 +26,7 @@ Unreleased | |||||
:py:meth:`~.Template.has` for consistency with :py:class:`~.Template`\ 's | :py:meth:`~.Template.has` for consistency with :py:class:`~.Template`\ 's | ||||
other methods; :py:meth:`~.has_param` is now an alias. | other methods; :py:meth:`~.has_param` is now an alias. | ||||
- The C tokenizer extension now works on Python 3 in addition to Python 2.7. | - The C tokenizer extension now works on Python 3 in addition to Python 2.7. | ||||
- Various fixes and cleanup. | |||||
- Various bugfixes, internal changes, and cleanup. | |||||
v0.2 | v0.2 | ||||
---- | ---- | ||||
@@ -34,6 +34,7 @@ __license__ = "MIT License" | |||||
__version__ = "0.3.dev" | __version__ = "0.3.dev" | ||||
__email__ = "ben.kurtovic@verizon.net" | __email__ = "ben.kurtovic@verizon.net" | ||||
from . import compat, nodes, parser, smart_list, string_mixin, utils, wikicode | |||||
from . import (compat, definitions, nodes, parser, smart_list, string_mixin, | |||||
utils, wikicode) | |||||
parse = utils.parse_anything | parse = utils.parse_anything |
@@ -70,7 +70,7 @@ class ExternalLink(Node): | |||||
@property | @property | ||||
def url(self): | def url(self): | ||||
"""The url of the link target, as a :py:class:`~.Wikicode` object.""" | |||||
"""The URL of the link target, as a :py:class:`~.Wikicode` object.""" | |||||
return self._url | return self._url | ||||
@property | @property | ||||
@@ -46,16 +46,15 @@ class Parser(object): | |||||
:py:class:`~.Node`\ s by the :py:class:`~.Builder`. | :py:class:`~.Node`\ s by the :py:class:`~.Builder`. | ||||
""" | """ | ||||
def __init__(self, text): | |||||
self.text = text | |||||
def __init__(self): | |||||
if use_c and CTokenizer: | if use_c and CTokenizer: | ||||
self._tokenizer = CTokenizer() | self._tokenizer = CTokenizer() | ||||
else: | else: | ||||
self._tokenizer = Tokenizer() | self._tokenizer = Tokenizer() | ||||
self._builder = Builder() | self._builder = Builder() | ||||
def parse(self): | |||||
"""Return a string as a parsed :py:class:`~.Wikicode` object tree.""" | |||||
tokens = self._tokenizer.tokenize(self.text) | |||||
def parse(self, text, context=0): | |||||
"""Parse *text*, returning a :py:class:`~.Wikicode` object tree.""" | |||||
tokens = self._tokenizer.tokenize(text, context) | |||||
code = self._builder.build(tokens) | code = self._builder.build(tokens) | ||||
return code | return code |
@@ -2667,8 +2667,9 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||||
static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) | static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) | ||||
{ | { | ||||
PyObject *text, *temp; | PyObject *text, *temp; | ||||
int context = 0; | |||||
if (PyArg_ParseTuple(args, "U", &text)) { | |||||
if (PyArg_ParseTuple(args, "U|i", &text, &context)) { | |||||
Py_XDECREF(self->text); | Py_XDECREF(self->text); | ||||
self->text = PySequence_Fast(text, "expected a sequence"); | self->text = PySequence_Fast(text, "expected a sequence"); | ||||
} | } | ||||
@@ -2677,7 +2678,7 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) | |||||
Py_ssize_t size; | Py_ssize_t size; | ||||
/* Failed to parse a Unicode object; try a string instead. */ | /* Failed to parse a Unicode object; try a string instead. */ | ||||
PyErr_Clear(); | PyErr_Clear(); | ||||
if (!PyArg_ParseTuple(args, "s#", &encoded, &size)) | |||||
if (!PyArg_ParseTuple(args, "s#|i", &encoded, &size, &context)) | |||||
return NULL; | return NULL; | ||||
temp = PyUnicode_FromStringAndSize(encoded, size); | temp = PyUnicode_FromStringAndSize(encoded, size); | ||||
if (!text) | if (!text) | ||||
@@ -2689,7 +2690,7 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) | |||||
} | } | ||||
self->head = self->global = self->depth = self->cycles = 0; | self->head = self->global = self->depth = self->cycles = 0; | ||||
self->length = PyList_GET_SIZE(self->text); | self->length = PyList_GET_SIZE(self->text); | ||||
return Tokenizer_parse(self, 0, 1); | |||||
return Tokenizer_parse(self, context, 1); | |||||
} | } | ||||
static int load_entitydefs(void) | static int load_entitydefs(void) | ||||
@@ -1125,8 +1125,9 @@ class Tokenizer(object): | |||||
self._emit_text(this) | self._emit_text(this) | ||||
self._head += 1 | self._head += 1 | ||||
def tokenize(self, text): | |||||
def tokenize(self, text, context=0): | |||||
"""Build a list of tokens from a string of wikicode and return it.""" | """Build a list of tokens from a string of wikicode and return it.""" | ||||
split = self.regex.split(text) | split = self.regex.split(text) | ||||
self._text = [segment for segment in split if segment] | self._text = [segment for segment in split if segment] | ||||
return self._parse() | |||||
self._head = self._global = self._depth = self._cycles = 0 | |||||
return self._parse(context) |
@@ -33,7 +33,7 @@ from .smart_list import SmartList | |||||
__all__ = ["parse_anything"] | __all__ = ["parse_anything"] | ||||
def parse_anything(value): | |||||
def parse_anything(value, context=0): | |||||
"""Return a :py:class:`~.Wikicode` for *value*, allowing multiple types. | """Return a :py:class:`~.Wikicode` for *value*, allowing multiple types. | ||||
This differs from :py:meth:`.Parser.parse` in that we accept more than just | This differs from :py:meth:`.Parser.parse` in that we accept more than just | ||||
@@ -44,6 +44,12 @@ def parse_anything(value): | |||||
on-the-fly by various methods of :py:class:`~.Wikicode` and others like | on-the-fly by various methods of :py:class:`~.Wikicode` and others like | ||||
:py:class:`~.Template`, such as :py:meth:`wikicode.insert() | :py:class:`~.Template`, such as :py:meth:`wikicode.insert() | ||||
<.Wikicode.insert>` or setting :py:meth:`template.name <.Template.name>`. | <.Wikicode.insert>` or setting :py:meth:`template.name <.Template.name>`. | ||||
If given, *context* will be passed as a starting context to the parser. | |||||
This is helpful when this function is used inside node attribute setters. | |||||
For example, :py:class:`~.ExternalLink`\ 's :py:attr:`~.ExternalLink.url` | |||||
setter sets *context* to :py:mod:`contexts.EXT_LINK_URI <.contexts>` to | |||||
prevent the URL itself from becoming an :py:class:`~.ExternalLink`. | |||||
""" | """ | ||||
from .parser import Parser | from .parser import Parser | ||||
from .wikicode import Wikicode | from .wikicode import Wikicode | ||||
@@ -53,17 +59,17 @@ def parse_anything(value): | |||||
elif isinstance(value, Node): | elif isinstance(value, Node): | ||||
return Wikicode(SmartList([value])) | return Wikicode(SmartList([value])) | ||||
elif isinstance(value, str): | elif isinstance(value, str): | ||||
return Parser(value).parse() | |||||
return Parser().parse(value, context) | |||||
elif isinstance(value, bytes): | elif isinstance(value, bytes): | ||||
return Parser(value.decode("utf8")).parse() | |||||
return Parser().parse(value.decode("utf8"), context) | |||||
elif isinstance(value, int): | elif isinstance(value, int): | ||||
return Parser(str(value)).parse() | |||||
return Parser().parse(str(value), context) | |||||
elif value is None: | elif value is None: | ||||
return Wikicode(SmartList()) | return Wikicode(SmartList()) | ||||
try: | try: | ||||
nodelist = SmartList() | nodelist = SmartList() | ||||
for item in value: | for item in value: | ||||
nodelist += parse_anything(item).nodes | |||||
nodelist += parse_anything(item, context).nodes | |||||
except TypeError: | except TypeError: | ||||
error = "Needs string, Node, Wikicode, int, None, or iterable of these, but got {0}: {1}" | error = "Needs string, Node, Wikicode, int, None, or iterable of these, but got {0}: {1}" | ||||
raise ValueError(error.format(type(value).__name__, value)) | raise ValueError(error.format(type(value).__name__, value)) | ||||
@@ -36,9 +36,9 @@ class TestParser(TreeEqualityTestCase): | |||||
def test_use_c(self): | def test_use_c(self): | ||||
"""make sure the correct tokenizer is used""" | """make sure the correct tokenizer is used""" | ||||
if parser.use_c: | if parser.use_c: | ||||
self.assertTrue(parser.Parser(None)._tokenizer.USES_C) | |||||
self.assertTrue(parser.Parser()._tokenizer.USES_C) | |||||
parser.use_c = False | parser.use_c = False | ||||
self.assertFalse(parser.Parser(None)._tokenizer.USES_C) | |||||
self.assertFalse(parser.Parser()._tokenizer.USES_C) | |||||
def test_parsing(self): | def test_parsing(self): | ||||
"""integration test for parsing overall""" | """integration test for parsing overall""" | ||||
@@ -59,7 +59,7 @@ class TestParser(TreeEqualityTestCase): | |||||
])) | ])) | ||||
]) | ]) | ||||
]) | ]) | ||||
actual = parser.Parser(text).parse() | |||||
actual = parser.Parser().parse(text) | |||||
self.assertWikicodeEqual(expected, actual) | self.assertWikicodeEqual(expected, actual) | ||||
if __name__ == "__main__": | if __name__ == "__main__": | ||||