瀏覽代碼

Doc updates, and allow passing a starting context to tokenize().

tags/v0.3
Ben Kurtovic 11 年之前
父節點
當前提交
67f1762aa4
共有 11 個文件被更改,包括 41 次插入24 次删除
  1. +1
    -1
      CHANGELOG
  2. +8
    -0
      docs/api/mwparserfromhell.nodes.rst
  3. +3
    -2
      docs/api/mwparserfromhell.rst
  4. +1
    -1
      docs/changelog.rst
  5. +2
    -1
      mwparserfromhell/__init__.py
  6. +1
    -1
      mwparserfromhell/nodes/external_link.py
  7. +4
    -5
      mwparserfromhell/parser/__init__.py
  8. +4
    -3
      mwparserfromhell/parser/tokenizer.c
  9. +3
    -2
      mwparserfromhell/parser/tokenizer.py
  10. +11
    -5
      mwparserfromhell/utils.py
  11. +3
    -3
      tests/test_parser.py

+ 1
- 1
CHANGELOG 查看文件

@@ -16,7 +16,7 @@ v0.3 (unreleased):
- Renamed Template.has_param() to has() for consistency with Template's other
methods; has_param() is now an alias.
- The C tokenizer extension now works on Python 3 in addition to Python 2.7.
- Various fixes and cleanup.
- Various bugfixes, internal changes, and cleanup.

v0.2 (released June 20, 2013):



+ 8
- 0
docs/api/mwparserfromhell.nodes.rst 查看文件

@@ -25,6 +25,14 @@ nodes Package
:undoc-members:
:show-inheritance:

:mod:`external_link` Module
---------------------------

.. automodule:: mwparserfromhell.nodes.external_link
:members:
:undoc-members:
:show-inheritance:

:mod:`heading` Module
---------------------



+ 3
- 2
docs/api/mwparserfromhell.rst 查看文件

@@ -30,10 +30,10 @@ mwparserfromhell Package
:members:
:undoc-members:

:mod:`tag_defs` Module
:mod:`definitions` Module
-------------------------

.. automodule:: mwparserfromhell.tag_defs
.. automodule:: mwparserfromhell.definitions
:members:

:mod:`utils` Module


+ 1
- 1
docs/changelog.rst 查看文件

@@ -26,7 +26,7 @@ Unreleased
:py:meth:`~.Template.has` for consistency with :py:class:`~.Template`\ 's
other methods; :py:meth:`~.has_param` is now an alias.
- The C tokenizer extension now works on Python 3 in addition to Python 2.7.
- Various fixes and cleanup.
- Various bugfixes, internal changes, and cleanup.

v0.2
----


+ 2
- 1
mwparserfromhell/__init__.py 查看文件

@@ -34,6 +34,7 @@ __license__ = "MIT License"
__version__ = "0.3.dev"
__email__ = "ben.kurtovic@verizon.net"

from . import compat, nodes, parser, smart_list, string_mixin, utils, wikicode
from . import (compat, definitions, nodes, parser, smart_list, string_mixin,
utils, wikicode)

parse = utils.parse_anything

+ 1
- 1
mwparserfromhell/nodes/external_link.py 查看文件

@@ -70,7 +70,7 @@ class ExternalLink(Node):

@property
def url(self):
"""The url of the link target, as a :py:class:`~.Wikicode` object."""
"""The URL of the link target, as a :py:class:`~.Wikicode` object."""
return self._url

@property


+ 4
- 5
mwparserfromhell/parser/__init__.py 查看文件

@@ -46,16 +46,15 @@ class Parser(object):
:py:class:`~.Node`\ s by the :py:class:`~.Builder`.
"""

def __init__(self, text):
self.text = text
def __init__(self):
if use_c and CTokenizer:
self._tokenizer = CTokenizer()
else:
self._tokenizer = Tokenizer()
self._builder = Builder()

def parse(self):
"""Return a string as a parsed :py:class:`~.Wikicode` object tree."""
tokens = self._tokenizer.tokenize(self.text)
def parse(self, text, context=0):
"""Parse *text*, returning a :py:class:`~.Wikicode` object tree."""
tokens = self._tokenizer.tokenize(text, context)
code = self._builder.build(tokens)
return code

+ 4
- 3
mwparserfromhell/parser/tokenizer.c 查看文件

@@ -2667,8 +2667,9 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push)
static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args)
{
PyObject *text, *temp;
int context = 0;

if (PyArg_ParseTuple(args, "U", &text)) {
if (PyArg_ParseTuple(args, "U|i", &text, &context)) {
Py_XDECREF(self->text);
self->text = PySequence_Fast(text, "expected a sequence");
}
@@ -2677,7 +2678,7 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args)
Py_ssize_t size;
/* Failed to parse a Unicode object; try a string instead. */
PyErr_Clear();
if (!PyArg_ParseTuple(args, "s#", &encoded, &size))
if (!PyArg_ParseTuple(args, "s#|i", &encoded, &size, &context))
return NULL;
temp = PyUnicode_FromStringAndSize(encoded, size);
if (!text)
@@ -2689,7 +2690,7 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args)
}
self->head = self->global = self->depth = self->cycles = 0;
self->length = PyList_GET_SIZE(self->text);
return Tokenizer_parse(self, 0, 1);
return Tokenizer_parse(self, context, 1);
}

static int load_entitydefs(void)


+ 3
- 2
mwparserfromhell/parser/tokenizer.py 查看文件

@@ -1125,8 +1125,9 @@ class Tokenizer(object):
self._emit_text(this)
self._head += 1

def tokenize(self, text):
def tokenize(self, text, context=0):
"""Build a list of tokens from a string of wikicode and return it."""
split = self.regex.split(text)
self._text = [segment for segment in split if segment]
return self._parse()
self._head = self._global = self._depth = self._cycles = 0
return self._parse(context)

+ 11
- 5
mwparserfromhell/utils.py 查看文件

@@ -33,7 +33,7 @@ from .smart_list import SmartList

__all__ = ["parse_anything"]

def parse_anything(value):
def parse_anything(value, context=0):
"""Return a :py:class:`~.Wikicode` for *value*, allowing multiple types.

This differs from :py:meth:`.Parser.parse` in that we accept more than just
@@ -44,6 +44,12 @@ def parse_anything(value):
on-the-fly by various methods of :py:class:`~.Wikicode` and others like
:py:class:`~.Template`, such as :py:meth:`wikicode.insert()
<.Wikicode.insert>` or setting :py:meth:`template.name <.Template.name>`.

If given, *context* will be passed as a starting context to the parser.
This is helpful when this function is used inside node attribute setters.
For example, :py:class:`~.ExternalLink`\ 's :py:attr:`~.ExternalLink.url`
setter sets *context* to :py:mod:`contexts.EXT_LINK_URI <.contexts>` to
prevent the URL itself from becoming an :py:class:`~.ExternalLink`.
"""
from .parser import Parser
from .wikicode import Wikicode
@@ -53,17 +59,17 @@ def parse_anything(value):
elif isinstance(value, Node):
return Wikicode(SmartList([value]))
elif isinstance(value, str):
return Parser(value).parse()
return Parser().parse(value, context)
elif isinstance(value, bytes):
return Parser(value.decode("utf8")).parse()
return Parser().parse(value.decode("utf8"), context)
elif isinstance(value, int):
return Parser(str(value)).parse()
return Parser().parse(str(value), context)
elif value is None:
return Wikicode(SmartList())
try:
nodelist = SmartList()
for item in value:
nodelist += parse_anything(item).nodes
nodelist += parse_anything(item, context).nodes
except TypeError:
error = "Needs string, Node, Wikicode, int, None, or iterable of these, but got {0}: {1}"
raise ValueError(error.format(type(value).__name__, value))


+ 3
- 3
tests/test_parser.py 查看文件

@@ -36,9 +36,9 @@ class TestParser(TreeEqualityTestCase):
def test_use_c(self):
"""make sure the correct tokenizer is used"""
if parser.use_c:
self.assertTrue(parser.Parser(None)._tokenizer.USES_C)
self.assertTrue(parser.Parser()._tokenizer.USES_C)
parser.use_c = False
self.assertFalse(parser.Parser(None)._tokenizer.USES_C)
self.assertFalse(parser.Parser()._tokenizer.USES_C)

def test_parsing(self):
"""integration test for parsing overall"""
@@ -59,7 +59,7 @@ class TestParser(TreeEqualityTestCase):
]))
])
])
actual = parser.Parser(text).parse()
actual = parser.Parser().parse(text)
self.assertWikicodeEqual(expected, actual)

if __name__ == "__main__":


Loading…
取消
儲存