From 03120bc97e01674fc00f815304e7b644a3681e32 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 21 Sep 2012 23:21:41 -0400 Subject: [PATCH 001/180] Version bump for 0.2.dev; fix a documentation bug. --- docs/conf.py | 4 ++-- docs/index.rst | 2 +- mwparserfromhell/__init__.py | 2 +- mwparserfromhell/parser/tokenizer.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index c537d37..6cc3664 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -48,9 +48,9 @@ copyright = u'2012 Ben Kurtovic' # built documents. # # The short X.Y version. -version = '0.1' +version = '0.2' # The full version, including alpha/beta/rc tags. -release = '0.1.1' +release = '0.2.dev' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/index.rst b/docs/index.rst index e198783..24f42f2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,4 +1,4 @@ -MWParserFromHell v0.1 Documentation +MWParserFromHell v0.2 Documentation =================================== :py:mod:`mwparserfromhell` (the *MediaWiki Parser from Hell*) is a Python diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index bdf5712..4f73a0e 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -31,7 +31,7 @@ from __future__ import unicode_literals __author__ = "Ben Kurtovic" __copyright__ = "Copyright (C) 2012 Ben Kurtovic" __license__ = "MIT License" -__version__ = "0.1.1" +__version__ = "0.2.dev" __email__ = "ben.kurtovic@verizon.net" from . import nodes, parser, smart_list, string_mixin, wikicode diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index a8ce88f..ca645b0 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -86,7 +86,7 @@ class Tokenizer(object): def _pop(self, keep_context=False): """Pop the current stack/context/textbuffer, returing the stack. - If *keep_context is ``True``, then we will replace the underlying + If *keep_context* is ``True``, then we will replace the underlying stack's context with the current stack's. """ self._push_textbuffer() From e0660f8bc31a00c3119d13d2d37bcf18042b3102 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 22 Sep 2012 22:47:05 -0400 Subject: [PATCH 002/180] Committing this C work for now. --- docs/conf.py | 5 +- mwparserfromhell/parser/builder.c | 24 +++ mwparserfromhell/parser/tokenizer.c | 322 ++++++++++++++++++++++++++++++++++++ setup.py | 9 +- 4 files changed, 357 insertions(+), 3 deletions(-) create mode 100644 mwparserfromhell/parser/builder.c create mode 100644 mwparserfromhell/parser/tokenizer.c diff --git a/docs/conf.py b/docs/conf.py index 6cc3664..cff089b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -17,6 +17,7 @@ import sys, os # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.insert(0, os.path.abspath('..')) +import mwparserfromhell # -- General configuration ----------------------------------------------------- @@ -48,9 +49,9 @@ copyright = u'2012 Ben Kurtovic' # built documents. # # The short X.Y version. -version = '0.2' +version = ".".join(mwparserfromhell.__version__.split(".", 2)[:2]) # The full version, including alpha/beta/rc tags. -release = '0.2.dev' +release = mwparserfromhell.__version__ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/mwparserfromhell/parser/builder.c b/mwparserfromhell/parser/builder.c new file mode 100644 index 0000000..7cbe236 --- /dev/null +++ b/mwparserfromhell/parser/builder.c @@ -0,0 +1,24 @@ +/* +Builder for MWParserFromHell +Copyright (C) 2012 Ben Kurtovic + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c new file mode 100644 index 0000000..3fdc370 --- /dev/null +++ b/mwparserfromhell/parser/tokenizer.c @@ -0,0 +1,322 @@ +/* +Tokenizer for MWParserFromHell +Copyright (C) 2012 Ben Kurtovic + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#ifndef PY_SSIZE_T_CLEAN +#define PY_SSIZE_T_CLEAN +#endif + +#include +#include "structmember.h" + +static const Py_UNICODE* OUT_OF_BOUNDS = ""; +static const Py_UNICODE* MARKERS[] = {"{", "}", "[", "]", "<", ">", "|", "=", + "&", "#", "*", ";", ":", "/", "-", "!", + "\n", OUT_OF_BOUNDS}; + +static PyMethodDef +module_methods[] = { + {NULL} +}; + +typedef struct { + PyObject_HEAD + PyObject* text; /* text to tokenize */ + PyObject* stacks; /* token stacks */ + PyObject* topstack; /* topmost stack */ + Py_ssize_t head; /* current position in text */ + Py_ssize_t length; /* length of text */ + Py_ssize_t global; /* global context */ +} Tokenizer; + +static PyObject* +Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds) +{ + Tokenizer *self; + + self = (Tokenizer*) type->tp_alloc(type, 0); + if (self != NULL) { + + self->text = Py_None; + Py_INCREF(Py_None); + + self->stacks = PyList_New(0); + if (self->stacks == NULL) { + Py_DECREF(self); + return NULL; + } + + self->head = 0; + self->length = 0; + self->global = 0; + } + + return (PyObject*) self; +} + +static void +Tokenizer_dealloc(Tokenizer* self) +{ + Py_XDECREF(self->text); + Py_XDECREF(self->stacks); + Py_XDECREF(self->topstack); + self->ob_type->tp_free((PyObject*) self); +} + +static int +Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds) +{ + static char* kwlist[] = {NULL}; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "", kwlist)) + return -1; + return 0; +} + +#define Tokenizer_STACK(self) PyList_GET_ITEM(self->topstack, 0) +#define Tokenizer_CONTEXT(self) PyList_GET_ITEM(self->topstack, 1) +#define Tokenizer_TEXTBUFFER(self) PyList_GET_ITEM(self->topstack, 2) + +static int +Tokenizer_set_context(Tokenizer* self, Py_ssize_t value) +{ + if (PyList_SetItem(self->topstack, 1, PyInt_FromSsize_t(value))) + return -1; + return 0; +} + +static int +Tokenizer_set_textbuffer(Tokenizer* self, PyObject* value) +{ + if (PyList_SetItem(self->topstack, 2, value)) + return -1; + return 0; +} + +/* + Add a new token stack, context, and textbuffer to the list. +*/ +static int +Tokenizer_push(Tokenizer* self, int context) +{ + PyObject* top = PyList_New(3); + PyList_SET_ITEM(top, 0, PyList_New(0)); + PyList_SET_ITEM(top, 1, PyInt_FromSsize_t(0)); + PyList_SET_ITEM(top, 2, PyList_New(0)); + + Py_XDECREF(self->topstack); + self->topstack = top; + + if (PyList_Append(self->stacks, top)) + return -1; + return 0; +} + +/* + Push the textbuffer onto the stack as a Text node and clear it. +*/ +static int +Tokenizer_push_textbuffer(Tokenizer* self) +{ + if (PyList_GET_SIZE(Tokenizer_TEXTBUFFER(self)) > 0) { + + PyObject* text; + // tokens.Text(text="".join(self._textbuffer)) + + if (PyList_Append(Tokenizer_STACK(self), text) + return -1; + + if (Tokenizer_set_textbuffer(self, PyList_New(0))) + return -1; + + return 0; + } +} + +/* + Pop the current stack/context/textbuffer, returing the stack. +*/ +static PyObject* +Tokenizer_pop(Tokenizer* self) +{ + if (Tokenizer_push_textbuffer(self)) + return NULL; + + self->stacks // POP!? +} + +/* + Pop the current stack/context/textbuffer, returing the stack. We will also + replace the underlying stack's context with the current stack's. +*/ +static PyObject* +Tokenizer_pop_keeping_context(Tokenizer* self) +{ + if (Tokenizer_push_textbuffer(self)) + return NULL; +} + +/* + Read the value at a relative point in the wikicode. +*/ +static Py_UNICODE* +Tokenizer_read(Tokenizer* self, Py_ssize_t delta) +{ + Py_ssize_t index = self->head + delta; + + if (index >= self->length) { + return OUT_OF_BOUNDS; + } + + PyObject* item = PySequence_Fast_GET_ITEM(self->text, index); + return PyUnicode_AS_UNICODE(item); +} + +/* + Parse the wikicode string, using *context* for when to stop. +*/ +static PyObject* +Tokenizer_parse(Tokenizer* self, int context) +{ + Py_UNICODE* this; + + Tokenizer_push(self, context); + + while (1) { + this = Tokenizer_read(self, 0); + if (this not in MARKERS) { + WRITE TEXT + } + if (this == OUT_OF_BOUNDS) { + return Tokenizer_push(self); + } + printf("%p %i %c\n", this, *this, *this); + self->head++; + } +} + +/* + Build a list of tokens from a string of wikicode and return it. +*/ +static PyObject* +Tokenizer_tokenize(Tokenizer* self, PyObject *args) +{ + PyObject* text; + + if (!PyArg_ParseTuple(args, "U", &text)) { + /* Failed to parse a Unicode object; try a string instead. */ + PyErr_Clear(); + const char* encoded; + Py_ssize_t size; + + if (!PyArg_ParseTuple(args, "s#", &encoded, &size)) { + return NULL; + } + + PyObject* temp; + temp = PyUnicode_FromStringAndSize(encoded, size); + if (text == NULL) + return NULL; + + Py_XDECREF(self->text); + text = PySequence_Fast(temp, "expected a sequence"); + Py_XDECREF(temp); + self->text = text; + } + else { + Py_XDECREF(self->text); + self->text = PySequence_Fast(text, "expected a sequence"); + } + + self->length = PySequence_Length(self->text); + + return Tokenizer_parse(self, 0); +} + +static PyMethodDef +Tokenizer_methods[] = { + {"tokenize", (PyCFunction) Tokenizer_tokenize, METH_VARARGS, + "Build a list of tokens from a string of wikicode and return it."}, + {NULL} +}; + +static PyMemberDef +Tokenizer_members[] = { + {NULL} +}; + +static PyTypeObject +TokenizerType = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "_tokenizer.CTokenizer", /* tp_name */ + sizeof(Tokenizer), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor) Tokenizer_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + "Creates a list of tokens from a string of wikicode.", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + Tokenizer_methods, /* tp_methods */ + Tokenizer_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc) Tokenizer_init, /* tp_init */ + 0, /* tp_alloc */ + Tokenizer_new, /* tp_new */ +}; + +PyMODINIT_FUNC +init_tokenizer(void) +{ + PyObject* module; + + TokenizerType.tp_new = PyType_GenericNew; + if (PyType_Ready(&TokenizerType) < 0) + return; + + module = Py_InitModule("_tokenizer", module_methods); + + Py_INCREF(&TokenizerType); + PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType); +} diff --git a/setup.py b/setup.py index 9faa56c..3664626 100644 --- a/setup.py +++ b/setup.py @@ -21,16 +21,23 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from setuptools import setup, find_packages +from setuptools import setup, find_packages, Extension from mwparserfromhell import __version__ with open("README.rst") as fp: long_docs = fp.read() +builder = Extension("mwparserfromhell.parser._builder", + sources = ["mwparserfromhell/parser/builder.c"]) + +tokenizer = Extension("mwparserfromhell.parser._tokenizer", + sources = ["mwparserfromhell/parser/tokenizer.c"]) + setup( name = "mwparserfromhell", packages = find_packages(exclude=("tests",)), + ext_modules = [builder, tokenizer], test_suite = "tests", version = __version__, author = "Ben Kurtovic", From 4cc4791d4871b833454ade8d9f52ee35e8bca742 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 23 Sep 2012 01:29:27 -0400 Subject: [PATCH 003/180] Adding a bunch more, and implementing Tokenizer_push_textbuffer. --- mwparserfromhell/parser/tokenizer.c | 116 ++++++++++++++++++++++++++++++------ setup.py | 2 +- 2 files changed, 98 insertions(+), 20 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 3fdc370..aec7b1d 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -28,10 +28,15 @@ SOFTWARE. #include #include "structmember.h" -static const Py_UNICODE* OUT_OF_BOUNDS = ""; -static const Py_UNICODE* MARKERS[] = {"{", "}", "[", "]", "<", ">", "|", "=", - "&", "#", "*", ";", ":", "/", "-", "!", - "\n", OUT_OF_BOUNDS}; +#define PU (Py_UNICODE*) +static const Py_UNICODE* OUT_OF_BOUNDS = PU""; +static const Py_UNICODE* MARKERS[] = {PU"{", PU"}", PU"[", PU"]", PU"<", PU">", + PU"|", PU"=", PU"&", PU"#", PU"*", PU";", + PU":", PU"/", PU"-", PU"!", PU"\n", PU""}; +#undef PU + +static PyObject* contexts; +static PyObject* tokens; static PyMethodDef module_methods[] = { @@ -60,7 +65,7 @@ Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds) Py_INCREF(Py_None); self->stacks = PyList_New(0); - if (self->stacks == NULL) { + if (!self->stacks) { Py_DECREF(self); return NULL; } @@ -91,9 +96,9 @@ Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds) return 0; } -#define Tokenizer_STACK(self) PyList_GET_ITEM(self->topstack, 0) -#define Tokenizer_CONTEXT(self) PyList_GET_ITEM(self->topstack, 1) -#define Tokenizer_TEXTBUFFER(self) PyList_GET_ITEM(self->topstack, 2) +#define Tokenizer_STACK(self) PySequence_Fast_GET_ITEM(self->topstack, 0) +#define Tokenizer_CONTEXT(self) PySequence_Fast_GET_ITEM(self->topstack, 1) +#define Tokenizer_TEXTBUFFER(self) PySequence_Fast_GET_ITEM(self->topstack, 2) static int Tokenizer_set_context(Tokenizer* self, Py_ssize_t value) @@ -136,19 +141,65 @@ Tokenizer_push(Tokenizer* self, int context) static int Tokenizer_push_textbuffer(Tokenizer* self) { - if (PyList_GET_SIZE(Tokenizer_TEXTBUFFER(self)) > 0) { + if (PySequence_Fast_GET_SIZE(Tokenizer_TEXTBUFFER(self)) > 0) { + PyObject* sep = PyUnicode_FromString(""); + if (!sep) return -1; + PyObject* text = PyUnicode_Join(sep, Tokenizer_TEXTBUFFER(self)); + Py_DECREF(sep); + if (!text) return -1; + + PyObject* klass = PyObject_GetAttrString(tokens, "Text"); + if (!klass) return -1; + PyObject* args = PyTuple_New(0); + if (!args) return -1; + PyObject* kwargs = PyDict_New(); + if (!kwargs) return -1; + PyDict_SetItemString(kwargs, "text", text); + Py_DECREF(text); + + PyObject* token = PyInstance_New(klass, args, kwargs); + if (!token) { + Py_DECREF(klass); + Py_DECREF(args); + Py_DECREF(kwargs); + return -1; + } - PyObject* text; - // tokens.Text(text="".join(self._textbuffer)) + Py_DECREF(klass); + Py_DECREF(args); + Py_DECREF(kwargs); - if (PyList_Append(Tokenizer_STACK(self), text) + if (PyList_Append(Tokenizer_STACK(self), token)) { + Py_XDECREF(token); return -1; + } + + Py_XDECREF(token); if (Tokenizer_set_textbuffer(self, PyList_New(0))) return -1; + } + return 0; +} - return 0; +static int +Tokenizer_delete_top_of_stack(Tokenizer* self) +{ + if (PySequence_DelItem(self->stacks, -1)) + return -1; + Py_DECREF(self->topstack); + + Py_ssize_t size = PySequence_Fast_GET_SIZE(self->stacks); + if (size > 0) { + PyObject* top = PySequence_Fast_GET_ITEM(self->stacks, size - 1); + self->topstack = top; + Py_INCREF(top); + } + else { + self->topstack = NULL; } + + return 0; } /* @@ -160,7 +211,13 @@ Tokenizer_pop(Tokenizer* self) if (Tokenizer_push_textbuffer(self)) return NULL; - self->stacks // POP!? + PyObject* stack = Tokenizer_STACK(self); + Py_INCREF(stack); + + if (Tokenizer_delete_top_of_stack(self)) + return NULL; + + return stack; } /* @@ -172,6 +229,19 @@ Tokenizer_pop_keeping_context(Tokenizer* self) { if (Tokenizer_push_textbuffer(self)) return NULL; + + PyObject* stack = Tokenizer_STACK(self); + PyObject* context = Tokenizer_CONTEXT(self); + Py_INCREF(stack); + Py_INCREF(context); + + if (Tokenizer_delete_top_of_stack(self)) + return NULL; + + if (PyList_SetItem(self->topstack, 1, context)) + return NULL; + + return stack; } /* @@ -183,7 +253,7 @@ Tokenizer_read(Tokenizer* self, Py_ssize_t delta) Py_ssize_t index = self->head + delta; if (index >= self->length) { - return OUT_OF_BOUNDS; + return (Py_UNICODE*) OUT_OF_BOUNDS; } PyObject* item = PySequence_Fast_GET_ITEM(self->text, index); @@ -202,11 +272,11 @@ Tokenizer_parse(Tokenizer* self, int context) while (1) { this = Tokenizer_read(self, 0); - if (this not in MARKERS) { + /* if (this not in MARKERS) { WRITE TEXT - } + } */ if (this == OUT_OF_BOUNDS) { - return Tokenizer_push(self); + return Tokenizer_pop(self); } printf("%p %i %c\n", this, *this, *this); self->head++; @@ -233,7 +303,7 @@ Tokenizer_tokenize(Tokenizer* self, PyObject *args) PyObject* temp; temp = PyUnicode_FromStringAndSize(encoded, size); - if (text == NULL) + if (!text) return NULL; Py_XDECREF(self->text); @@ -319,4 +389,12 @@ init_tokenizer(void) Py_INCREF(&TokenizerType); PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType); + + PyObject* globals = PyEval_GetGlobals(); + PyObject* locals = PyEval_GetLocals(); + PyObject* fromlist = PyList_New(0); + + contexts = PyImport_ImportModuleLevel("contexts", globals, locals, fromlist, 1); + tokens = PyImport_ImportModuleLevel("tokens", globals, locals, fromlist, 1); + Py_DECREF(fromlist); } diff --git a/setup.py b/setup.py index 3664626..e348ce5 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ builder = Extension("mwparserfromhell.parser._builder", sources = ["mwparserfromhell/parser/builder.c"]) tokenizer = Extension("mwparserfromhell.parser._tokenizer", - sources = ["mwparserfromhell/parser/tokenizer.c"]) + sources = ["mwparserfromhell/parser/tokenizer.c"]) setup( name = "mwparserfromhell", From 9c4aba13912c9d5b274a61a5f7c6d9945f72c0b6 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 23 Sep 2012 03:40:19 -0400 Subject: [PATCH 004/180] Adding a few more functions. --- mwparserfromhell/parser/tokenizer.c | 114 ++++++++++++++++++++++++++++++++---- 1 file changed, 101 insertions(+), 13 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index aec7b1d..99c9bfc 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -26,15 +26,20 @@ SOFTWARE. #endif #include +#include "setjmp.h" #include "structmember.h" +static PyObject* EMPTY; + #define PU (Py_UNICODE*) -static const Py_UNICODE* OUT_OF_BOUNDS = PU""; static const Py_UNICODE* MARKERS[] = {PU"{", PU"}", PU"[", PU"]", PU"<", PU">", PU"|", PU"=", PU"&", PU"#", PU"*", PU";", PU":", PU"/", PU"-", PU"!", PU"\n", PU""}; #undef PU +static jmp_buf exception_env; +static const int BAD_ROUTE = 1; + static PyObject* contexts; static PyObject* tokens; @@ -142,10 +147,7 @@ static int Tokenizer_push_textbuffer(Tokenizer* self) { if (PySequence_Fast_GET_SIZE(Tokenizer_TEXTBUFFER(self)) > 0) { - PyObject* sep = PyUnicode_FromString(""); - if (!sep) return -1; - PyObject* text = PyUnicode_Join(sep, Tokenizer_TEXTBUFFER(self)); - Py_DECREF(sep); + PyObject* text = PyUnicode_Join(EMPTY, Tokenizer_TEXTBUFFER(self)); if (!text) return -1; PyObject* klass = PyObject_GetAttrString(tokens, "Text"); @@ -174,7 +176,7 @@ Tokenizer_push_textbuffer(Tokenizer* self) return -1; } - Py_XDECREF(token); + Py_DECREF(token); if (Tokenizer_set_textbuffer(self, PyList_New(0))) return -1; @@ -245,19 +247,104 @@ Tokenizer_pop_keeping_context(Tokenizer* self) } /* + Fail the current tokenization route. + + Discards the current stack/context/textbuffer and "raises a BAD_ROUTE + exception", which is implemented using longjmp(). +*/ +static void +Tokenizer_fail_route(Tokenizer* self) +{ + Tokenizer_pop(self); + longjmp(exception_env, BAD_ROUTE); +} + +/* + Write a token to the end of the current token stack. +*/ +static int +Tokenizer_write(Tokenizer* self, PyObject* token) +{ + if (Tokenizer_push_textbuffer(self)) + return -1; + + if (PyList_Append(Tokenizer_STACK(self), token)) { + Py_XDECREF(token); + return -1; + } + + Py_XDECREF(token); + return 0; +} + +/* + Write a token to the beginning of the current token stack. +*/ +static int +Tokenizer_write_first(Tokenizer* self, PyObject* token) +{ + if (Tokenizer_push_textbuffer(self)) + return -1; + + if (PyList_Insert(Tokenizer_STACK(self), 0, token)) { + Py_XDECREF(token); + return -1; + } + + Py_XDECREF(token); + return 0; +} + +/* + Write text to the current textbuffer. +*/ +static int +Tokenizer_write_text(Tokenizer* self, PyObject* text) +{ + if (PyList_Append(Tokenizer_TEXTBUFFER(self), text)) { + Py_XDECREF(text); + return -1; + } + + Py_XDECREF(text); + return 0; +} + +/* + Write a series of tokens to the current stack at once. +*/ +static int +Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist) +{ + if (Tokenizer_push_textbuffer(self)) + Py_XDECREF(tokenlist); + return -1; + + PyObject* stack = Tokenizer_STACK(self); + Py_ssize_t size = PySequence_Fast_GET_SIZE(stack); + + if (PyList_SetSlice(stack, size, size, tokenlist)) { + Py_XDECREF(tokenlist); + return -1; + } + + Py_XDECREF(tokenlist); + return 0; +} + +/* Read the value at a relative point in the wikicode. */ -static Py_UNICODE* +static PyObject* Tokenizer_read(Tokenizer* self, Py_ssize_t delta) { Py_ssize_t index = self->head + delta; if (index >= self->length) { - return (Py_UNICODE*) OUT_OF_BOUNDS; + return EMPTY; } - PyObject* item = PySequence_Fast_GET_ITEM(self->text, index); - return PyUnicode_AS_UNICODE(item); + return PySequence_Fast_GET_ITEM(self->text, index); } /* @@ -266,7 +353,7 @@ Tokenizer_read(Tokenizer* self, Py_ssize_t delta) static PyObject* Tokenizer_parse(Tokenizer* self, int context) { - Py_UNICODE* this; + PyObject* this; Tokenizer_push(self, context); @@ -275,10 +362,9 @@ Tokenizer_parse(Tokenizer* self, int context) /* if (this not in MARKERS) { WRITE TEXT } */ - if (this == OUT_OF_BOUNDS) { + if (this == EMPTY) { return Tokenizer_pop(self); } - printf("%p %i %c\n", this, *this, *this); self->head++; } } @@ -390,6 +476,8 @@ init_tokenizer(void) Py_INCREF(&TokenizerType); PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType); + EMPTY = PyUnicode_FromString(""); + PyObject* globals = PyEval_GetGlobals(); PyObject* locals = PyEval_GetLocals(); PyObject* fromlist = PyList_New(0); From 5267c30cf60b9c03cdf908112f8bffc390a87ac1 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 23 Sep 2012 03:57:04 -0400 Subject: [PATCH 005/180] Fix refcount handling; implement Tokenizer_write_text_then_stack. --- mwparserfromhell/parser/tokenizer.c | 52 +++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 99c9bfc..3f7e84e 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -247,10 +247,9 @@ Tokenizer_pop_keeping_context(Tokenizer* self) } /* - Fail the current tokenization route. - - Discards the current stack/context/textbuffer and "raises a BAD_ROUTE - exception", which is implemented using longjmp(). + Fail the current tokenization route. Discards the current + stack/context/textbuffer and "raises a BAD_ROUTE exception", which is + implemented using longjmp(). */ static void Tokenizer_fail_route(Tokenizer* self) @@ -268,12 +267,9 @@ Tokenizer_write(Tokenizer* self, PyObject* token) if (Tokenizer_push_textbuffer(self)) return -1; - if (PyList_Append(Tokenizer_STACK(self), token)) { - Py_XDECREF(token); + if (PyList_Append(Tokenizer_STACK(self), token)) return -1; - } - Py_XDECREF(token); return 0; } @@ -286,12 +282,9 @@ Tokenizer_write_first(Tokenizer* self, PyObject* token) if (Tokenizer_push_textbuffer(self)) return -1; - if (PyList_Insert(Tokenizer_STACK(self), 0, token)) { - Py_XDECREF(token); + if (PyList_Insert(Tokenizer_STACK(self), 0, token)) return -1; - } - Py_XDECREF(token); return 0; } @@ -301,12 +294,9 @@ Tokenizer_write_first(Tokenizer* self, PyObject* token) static int Tokenizer_write_text(Tokenizer* self, PyObject* text) { - if (PyList_Append(Tokenizer_TEXTBUFFER(self), text)) { - Py_XDECREF(text); + if (PyList_Append(Tokenizer_TEXTBUFFER(self), text)) return -1; - } - Py_XDECREF(text); return 0; } @@ -317,18 +307,40 @@ static int Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist) { if (Tokenizer_push_textbuffer(self)) - Py_XDECREF(tokenlist); return -1; PyObject* stack = Tokenizer_STACK(self); Py_ssize_t size = PySequence_Fast_GET_SIZE(stack); - if (PyList_SetSlice(stack, size, size, tokenlist)) { - Py_XDECREF(tokenlist); + if (PyList_SetSlice(stack, size, size, tokenlist)) + return -1; + + return 0; +} + +/* + Pop the current stack, write text, and then write the stack. +*/ +static int +Tokenizer_write_text_then_stack(Tokenizer* self, PyObject* text) +{ + PyObject* stack = Tokenizer_pop(self); + if (Tokenizer_write_text(self, text)) { + Py_XDECREF(stack); return -1; } - Py_XDECREF(tokenlist); + if (stack) { + if (PySequence_Fast_GET_SIZE(stack) > 0) { + if (Tokenizer_write_all(self, stack)) { + Py_DECREF(stack); + return -1; + } + } + Py_DECREF(stack); + } + + self->head--; return 0; } From 8729d20f078df40c50a70ee7cbd392b534173a88 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 23 Sep 2012 17:40:46 -0400 Subject: [PATCH 006/180] Fill out Tokenizer_parse(); build a bunch of empty function definitions. --- mwparserfromhell/parser/tokenizer.c | 252 ++++++++++++++++++++++++++++++++++-- 1 file changed, 240 insertions(+), 12 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 3f7e84e..0d18473 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -26,8 +26,8 @@ SOFTWARE. #endif #include -#include "setjmp.h" -#include "structmember.h" +#include +#include static PyObject* EMPTY; @@ -35,7 +35,10 @@ static PyObject* EMPTY; static const Py_UNICODE* MARKERS[] = {PU"{", PU"}", PU"[", PU"]", PU"<", PU">", PU"|", PU"=", PU"&", PU"#", PU"*", PU";", PU":", PU"/", PU"-", PU"!", PU"\n", PU""}; -#undef PU +static const int NUM_MARKERS = 17; + +#define CONTEXT(name) PyInt_AsSsize_t((PyIntObject*) \ + PyObject_GetAttrString(contexts, name)) static jmp_buf exception_env; static const int BAD_ROUTE = 1; @@ -103,6 +106,7 @@ Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds) #define Tokenizer_STACK(self) PySequence_Fast_GET_ITEM(self->topstack, 0) #define Tokenizer_CONTEXT(self) PySequence_Fast_GET_ITEM(self->topstack, 1) +#define Tokenizer_CONTEXT_VAL(self) PyInt_AsSsize_t((PyIntObject*) Tokenizer_CONTEXT(self)) #define Tokenizer_TEXTBUFFER(self) PySequence_Fast_GET_ITEM(self->topstack, 2) static int @@ -125,11 +129,11 @@ Tokenizer_set_textbuffer(Tokenizer* self, PyObject* value) Add a new token stack, context, and textbuffer to the list. */ static int -Tokenizer_push(Tokenizer* self, int context) +Tokenizer_push(Tokenizer* self, Py_ssize_t context) { PyObject* top = PyList_New(3); PyList_SET_ITEM(top, 0, PyList_New(0)); - PyList_SET_ITEM(top, 1, PyInt_FromSsize_t(0)); + PyList_SET_ITEM(top, 1, PyInt_FromSsize_t(context)); PyList_SET_ITEM(top, 2, PyList_New(0)); Py_XDECREF(self->topstack); @@ -345,7 +349,7 @@ Tokenizer_write_text_then_stack(Tokenizer* self, PyObject* text) } /* - Read the value at a relative point in the wikicode. + Read the value at a relative point in the wikicode, forwards. */ static PyObject* Tokenizer_read(Tokenizer* self, Py_ssize_t delta) @@ -360,23 +364,247 @@ Tokenizer_read(Tokenizer* self, Py_ssize_t delta) } /* - Parse the wikicode string, using *context* for when to stop. + Read the value at a relative point in the wikicode, backwards. */ static PyObject* -Tokenizer_parse(Tokenizer* self, int context) +Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) +{ + if (delta > self->head) { + return EMPTY; + } + + Py_ssize_t index = self->head - delta; + return PySequence_Fast_GET_ITEM(self->text, index); +} + +static int +Tokenizer_parse_template_or_argument(Tokenizer* self) +{ + +} + +static int +Tokenizer_parse_template(Tokenizer* self) +{ + +} + +static int +Tokenizer_parse_argument(Tokenizer* self) +{ + +} + +static int +Tokenizer_verify_safe(Tokenizer* self) +{ + +} + +static int +Tokenizer_handle_template_param(Tokenizer* self) +{ + +} + +static int +Tokenizer_handle_template_param_value(Tokenizer* self) { - PyObject* this; + +} + +static PyObject* +Tokenizer_handle_template_end(Tokenizer* self) +{ + +} + +static int +Tokenizer_handle_argument_separator(Tokenizer* self) +{ + +} + +static PyObject* +Tokenizer_handle_argument_end(Tokenizer* self) +{ + +} + +static int +Tokenizer_parse_wikilink(Tokenizer* self) +{ + +} + +static int +Tokenizer_handle_wikilink_separator(Tokenizer* self) +{ + +} + +static PyObject* +Tokenizer_handle_wikilink_end(Tokenizer* self) +{ + +} + +static int +Tokenizer_parse_heading(Tokenizer* self) +{ + +} + +static PyObject* +Tokenizer_handle_heading_end(Tokenizer* self) +{ + +} + +static int +Tokenizer_really_parse_entity(Tokenizer* self) +{ + +} + +static int +Tokenizer_parse_entity(Tokenizer* self) +{ + +} + +static int +Tokenizer_parse_comment(Tokenizer* self) +{ + +} + + +/* + Parse the wikicode string, using context for when to stop. +*/ +static PyObject* +Tokenizer_parse(Tokenizer* self, Py_ssize_t context) +{ + Py_ssize_t fail_contexts = ( + CONTEXT("TEMPLATE") | CONTEXT("ARGUMENT") | CONTEXT("HEADING") | + CONTEXT("COMMENT")); + + PyObject *this, *next; + Py_UNICODE *this_data, *next_data, *next_next_data, *last_data; + Py_ssize_t this_context; + int is_marker, i; Tokenizer_push(self, context); while (1) { this = Tokenizer_read(self, 0); - /* if (this not in MARKERS) { - WRITE TEXT - } */ + this_data = PyUnicode_AS_UNICODE(this); + + is_marker = 0; + for (i = 0; i < NUM_MARKERS; i++) { + if (MARKERS[i] == this_data) { + is_marker = 1; + break; + } + } + + if (!is_marker) { + Tokenizer_write_text(self, this); + self->head++; + continue; + } + + this_context = Tokenizer_CONTEXT_VAL(self); + if (this == EMPTY) { + if (this_context & fail_contexts) { + Tokenizer_fail_route(self); + } return Tokenizer_pop(self); } + + next = Tokenizer_read(self, 1); + next_data = PyUnicode_AS_UNICODE(next); + + if (this_context & CONTEXT("COMMENT")) { + if (this_data == next_data && next_data == PU "-") { + if (PyUnicode_AS_UNICODE(Tokenizer_read(self, 2)) == PU ">") { + return Tokenizer_pop(self); + } + } + Tokenizer_write_text(self, this); + } + else if (this_data == next_data && next_data == PU "{") { + Tokenizer_parse_template_or_argument(self); + } + else if (this_data == PU "|" && this_context & CONTEXT("TEMPLATE")) { + Tokenizer_handle_template_param(self); + } + else if (this_data == PU "=" && this_context & CONTEXT("TEMPLATE_PARAM_KEY")) { + Tokenizer_handle_template_param_value(self); + } + else if (this_data == next_data && next_data == PU "}" && + this_context & CONTEXT("TEMPLATE")) { + Tokenizer_handle_template_end(self); + } + else if (this_data == PU "|" && this_context & CONTEXT("ARGUMENT_NAME")) { + Tokenizer_handle_argument_separator(self); + } + else if (this_data == next_data && next_data == PU "}" && + this_context & CONTEXT("ARGUMENT")) { + if (PyUnicode_AS_UNICODE(Tokenizer_read(self, 2)) == PU "}") { + return Tokenizer_handle_argument_end(self); + } + Tokenizer_write_text(self, this); + } + else if (this_data == next_data && next_data == PU "[") { + if (!(this_context & CONTEXT("WIKILINK_TITLE"))) { + Tokenizer_parse_wikilink(self); + } + else { + Tokenizer_write_text(self, this); + } + } + else if (this_data == PU "|" && this_context & CONTEXT("WIKILINK_TITLE")) { + Tokenizer_handle_wikilink_separator(self); + } + else if (this_data == next_data && next_data == PU "]" && + this_context & CONTEXT("WIKILINK")) { + return Tokenizer_handle_wikilink_end(self); + } + else if (this_data == PU "=" && !(self->global & CONTEXT("GL_HEADING"))) { + last_data = PyUnicode_AS_UNICODE(Tokenizer_read_backwards(self, 1)); + if (last_data == PU "\n" || last_data == PU "") { + Tokenizer_parse_heading(self); + } + else { + Tokenizer_write_text(self, this); + } + } + else if (this_data == PU "=" && this_context & CONTEXT("HEADING")) { + return Tokenizer_handle_heading_end(self); + } + else if (this_data == PU "\n" && this_context & CONTEXT("HEADING")) { + Tokenizer_fail_route(self); + } + else if (this_data == PU "&") { + Tokenizer_parse_entity(self); + } + else if (this_data == PU "<" && next_data == PU "!") { + next_next_data = PyUnicode_AS_UNICODE(Tokenizer_read(self, 2)); + if (next_next_data == PyUnicode_AS_UNICODE(Tokenizer_read(self, 3)) && + next_next_data == PU "-") { + Tokenizer_parse_comment(self); + } + else { + Tokenizer_write_text(self, this); + } + } + else { + Tokenizer_write_text(self, this); + } + self->head++; } } From 1ecb0e0d4485e71f9d49555d114df56ac9f0acff Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 23 Sep 2012 17:48:57 -0400 Subject: [PATCH 007/180] Fix Tokenizer_verify_safe()'s prototype; add documentation. --- mwparserfromhell/parser/tokenizer.c | 57 +++++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 0d18473..ad013cb 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -377,109 +377,162 @@ Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) return PySequence_Fast_GET_ITEM(self->text, index); } +/* + Parse a template or argument at the head of the wikicode string. +*/ static int Tokenizer_parse_template_or_argument(Tokenizer* self) { } +/* + Parse a template at the head of the wikicode string. +*/ static int Tokenizer_parse_template(Tokenizer* self) { } +/* + Parse an argument at the head of the wikicode string. +*/ static int Tokenizer_parse_argument(Tokenizer* self) { } +/* + Verify that there are no unsafe characters in the current stack. The route + will be failed if the name contains any element of unsafes in it (not + merely at the beginning or end). This is used when parsing a template name + or parameter key, which cannot contain newlines. +*/ static int -Tokenizer_verify_safe(Tokenizer* self) +Tokenizer_verify_safe(Tokenizer* self, Py_UNICODE* unsafes[]) { } +/* + Handle a template parameter at the head of the string. +*/ static int Tokenizer_handle_template_param(Tokenizer* self) { } +/* + Handle a template parameter's value at the head of the string. +*/ static int Tokenizer_handle_template_param_value(Tokenizer* self) { } +/* + Handle the end of a template at the head of the string. +*/ static PyObject* Tokenizer_handle_template_end(Tokenizer* self) { } +/* + Handle the separator between an argument's name and default. +*/ static int Tokenizer_handle_argument_separator(Tokenizer* self) { } +/* + Handle the end of an argument at the head of the string. +*/ static PyObject* Tokenizer_handle_argument_end(Tokenizer* self) { } +/* + Parse an internal wikilink at the head of the wikicode string. +*/ static int Tokenizer_parse_wikilink(Tokenizer* self) { } +/* + Handle the separator between a wikilink's title and its text. +*/ static int Tokenizer_handle_wikilink_separator(Tokenizer* self) { } +/* + Handle the end of a wikilink at the head of the string. +*/ static PyObject* Tokenizer_handle_wikilink_end(Tokenizer* self) { } +/* + Parse a section heading at the head of the wikicode string. +*/ static int Tokenizer_parse_heading(Tokenizer* self) { } +/* + Handle the end of a section heading at the head of the string. +*/ static PyObject* Tokenizer_handle_heading_end(Tokenizer* self) { } +/* + Actually parse an HTML entity and ensure that it is valid. +*/ static int Tokenizer_really_parse_entity(Tokenizer* self) { } +/* + Parse an HTML entity at the head of the wikicode string. +*/ static int Tokenizer_parse_entity(Tokenizer* self) { } +/* + Parse an HTML comment at the head of the wikicode string. +*/ static int Tokenizer_parse_comment(Tokenizer* self) { } - /* Parse the wikicode string, using context for when to stop. */ From 7fc45783b78772b5b689f3b724481997e23cd4ca Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 23 Sep 2012 18:30:04 -0400 Subject: [PATCH 008/180] Add a header file; improve context handling. --- mwparserfromhell/parser/tokenizer.c | 131 +++--------------------- mwparserfromhell/parser/tokenizer.h | 199 ++++++++++++++++++++++++++++++++++++ 2 files changed, 213 insertions(+), 117 deletions(-) create mode 100644 mwparserfromhell/parser/tokenizer.h diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index ad013cb..41713e2 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -21,45 +21,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef PY_SSIZE_T_CLEAN -#define PY_SSIZE_T_CLEAN -#endif - -#include -#include -#include - -static PyObject* EMPTY; - -#define PU (Py_UNICODE*) -static const Py_UNICODE* MARKERS[] = {PU"{", PU"}", PU"[", PU"]", PU"<", PU">", - PU"|", PU"=", PU"&", PU"#", PU"*", PU";", - PU":", PU"/", PU"-", PU"!", PU"\n", PU""}; -static const int NUM_MARKERS = 17; - -#define CONTEXT(name) PyInt_AsSsize_t((PyIntObject*) \ - PyObject_GetAttrString(contexts, name)) - -static jmp_buf exception_env; -static const int BAD_ROUTE = 1; - -static PyObject* contexts; -static PyObject* tokens; - -static PyMethodDef -module_methods[] = { - {NULL} -}; - -typedef struct { - PyObject_HEAD - PyObject* text; /* text to tokenize */ - PyObject* stacks; /* token stacks */ - PyObject* topstack; /* topmost stack */ - Py_ssize_t head; /* current position in text */ - Py_ssize_t length; /* length of text */ - Py_ssize_t global; /* global context */ -} Tokenizer; +#include "tokenizer.h" static PyObject* Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds) @@ -104,11 +66,6 @@ Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds) return 0; } -#define Tokenizer_STACK(self) PySequence_Fast_GET_ITEM(self->topstack, 0) -#define Tokenizer_CONTEXT(self) PySequence_Fast_GET_ITEM(self->topstack, 1) -#define Tokenizer_CONTEXT_VAL(self) PyInt_AsSsize_t((PyIntObject*) Tokenizer_CONTEXT(self)) -#define Tokenizer_TEXTBUFFER(self) PySequence_Fast_GET_ITEM(self->topstack, 2) - static int Tokenizer_set_context(Tokenizer* self, Py_ssize_t value) { @@ -539,9 +496,7 @@ Tokenizer_parse_comment(Tokenizer* self) static PyObject* Tokenizer_parse(Tokenizer* self, Py_ssize_t context) { - Py_ssize_t fail_contexts = ( - CONTEXT("TEMPLATE") | CONTEXT("ARGUMENT") | CONTEXT("HEADING") | - CONTEXT("COMMENT")); + Py_ssize_t fail_contexts = LC_TEMPLATE | LC_ARGUMENT | LC_HEADING | LC_COMMENT; PyObject *this, *next; Py_UNICODE *this_data, *next_data, *next_next_data, *last_data; @@ -580,7 +535,7 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context) next = Tokenizer_read(self, 1); next_data = PyUnicode_AS_UNICODE(next); - if (this_context & CONTEXT("COMMENT")) { + if (this_context & LC_COMMENT) { if (this_data == next_data && next_data == PU "-") { if (PyUnicode_AS_UNICODE(Tokenizer_read(self, 2)) == PU ">") { return Tokenizer_pop(self); @@ -591,42 +546,40 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context) else if (this_data == next_data && next_data == PU "{") { Tokenizer_parse_template_or_argument(self); } - else if (this_data == PU "|" && this_context & CONTEXT("TEMPLATE")) { + else if (this_data == PU "|" && this_context & LC_TEMPLATE) { Tokenizer_handle_template_param(self); } - else if (this_data == PU "=" && this_context & CONTEXT("TEMPLATE_PARAM_KEY")) { + else if (this_data == PU "=" && this_context & LC_TEMPLATE_PARAM_KEY) { Tokenizer_handle_template_param_value(self); } - else if (this_data == next_data && next_data == PU "}" && - this_context & CONTEXT("TEMPLATE")) { + else if (this_data == next_data && next_data == PU "}" && this_context & LC_TEMPLATE) { Tokenizer_handle_template_end(self); } - else if (this_data == PU "|" && this_context & CONTEXT("ARGUMENT_NAME")) { + else if (this_data == PU "|" && this_context & LC_ARGUMENT_NAME) { Tokenizer_handle_argument_separator(self); } - else if (this_data == next_data && next_data == PU "}" && - this_context & CONTEXT("ARGUMENT")) { + else if (this_data == next_data && next_data == PU "}" && this_context & LC_ARGUMENT) { if (PyUnicode_AS_UNICODE(Tokenizer_read(self, 2)) == PU "}") { return Tokenizer_handle_argument_end(self); } Tokenizer_write_text(self, this); } else if (this_data == next_data && next_data == PU "[") { - if (!(this_context & CONTEXT("WIKILINK_TITLE"))) { + if (!(this_context & LC_WIKILINK_TITLE)) { Tokenizer_parse_wikilink(self); } else { Tokenizer_write_text(self, this); } } - else if (this_data == PU "|" && this_context & CONTEXT("WIKILINK_TITLE")) { + else if (this_data == PU "|" && this_context & LC_WIKILINK_TITLE) { Tokenizer_handle_wikilink_separator(self); } else if (this_data == next_data && next_data == PU "]" && - this_context & CONTEXT("WIKILINK")) { + this_context & LC_WIKILINK) { return Tokenizer_handle_wikilink_end(self); } - else if (this_data == PU "=" && !(self->global & CONTEXT("GL_HEADING"))) { + else if (this_data == PU "=" && !(self->global & GL_HEADING)) { last_data = PyUnicode_AS_UNICODE(Tokenizer_read_backwards(self, 1)); if (last_data == PU "\n" || last_data == PU "") { Tokenizer_parse_heading(self); @@ -635,10 +588,10 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context) Tokenizer_write_text(self, this); } } - else if (this_data == PU "=" && this_context & CONTEXT("HEADING")) { + else if (this_data == PU "=" && this_context & LC_HEADING) { return Tokenizer_handle_heading_end(self); } - else if (this_data == PU "\n" && this_context & CONTEXT("HEADING")) { + else if (this_data == PU "\n" && this_context & LC_HEADING) { Tokenizer_fail_route(self); } else if (this_data == PU "&") { @@ -700,61 +653,6 @@ Tokenizer_tokenize(Tokenizer* self, PyObject *args) return Tokenizer_parse(self, 0); } -static PyMethodDef -Tokenizer_methods[] = { - {"tokenize", (PyCFunction) Tokenizer_tokenize, METH_VARARGS, - "Build a list of tokens from a string of wikicode and return it."}, - {NULL} -}; - -static PyMemberDef -Tokenizer_members[] = { - {NULL} -}; - -static PyTypeObject -TokenizerType = { - PyObject_HEAD_INIT(NULL) - 0, /* ob_size */ - "_tokenizer.CTokenizer", /* tp_name */ - sizeof(Tokenizer), /* tp_basicsize */ - 0, /* tp_itemsize */ - (destructor) Tokenizer_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_compare */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /* tp_flags */ - "Creates a list of tokens from a string of wikicode.", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - Tokenizer_methods, /* tp_methods */ - Tokenizer_members, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc) Tokenizer_init, /* tp_init */ - 0, /* tp_alloc */ - Tokenizer_new, /* tp_new */ -}; - PyMODINIT_FUNC init_tokenizer(void) { @@ -775,7 +673,6 @@ init_tokenizer(void) PyObject* locals = PyEval_GetLocals(); PyObject* fromlist = PyList_New(0); - contexts = PyImport_ImportModuleLevel("contexts", globals, locals, fromlist, 1); tokens = PyImport_ImportModuleLevel("tokens", globals, locals, fromlist, 1); Py_DECREF(fromlist); } diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h new file mode 100644 index 0000000..c504dd8 --- /dev/null +++ b/mwparserfromhell/parser/tokenizer.h @@ -0,0 +1,199 @@ +/* +Tokenizer Header File for MWParserFromHell +Copyright (C) 2012 Ben Kurtovic + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#ifndef PY_SSIZE_T_CLEAN +#define PY_SSIZE_T_CLEAN +#endif + +#include +#include +#include + +#define PU (Py_UNICODE*) + +static const Py_UNICODE* MARKERS[] = { + PU "{", PU "}", PU "[", PU "]", PU "<", PU ">", PU "|", PU "=", PU "&", + PU "#", PU "*", PU ";", PU ":", PU "/", PU "-", PU "!", PU "\n", PU ""}; +static const int NUM_MARKERS = 17; + +static jmp_buf exception_env; +static const int BAD_ROUTE = 1; + +static PyObject* EMPTY; +static PyObject* tokens; + + +/* Local contexts: */ + +static const Py_ssize_t LC_TEMPLATE = 0x0007; +static const Py_ssize_t LC_TEMPLATE_NAME = 0x0001; +static const Py_ssize_t LC_TEMPLATE_PARAM_KEY = 0x0002; +static const Py_ssize_t LC_TEMPLATE_PARAM_VALUE = 0x0004; + +static const Py_ssize_t LC_ARGUMENT = 0x0018; +static const Py_ssize_t LC_ARGUMENT_NAME = 0x0008; +static const Py_ssize_t LC_ARGUMENT_DEFAULT = 0x0010; + +static const Py_ssize_t LC_WIKILINK = 0x0060; +static const Py_ssize_t LC_WIKILINK_TITLE = 0x0020; +static const Py_ssize_t LC_WIKILINK_TEXT = 0x0040; + +static const Py_ssize_t LC_HEADING = 0x1f80; +static const Py_ssize_t LC_HEADING_LEVEL_1 = 0x0080; +static const Py_ssize_t LC_HEADING_LEVEL_2 = 0x0100; +static const Py_ssize_t LC_HEADING_LEVEL_3 = 0x0200; +static const Py_ssize_t LC_HEADING_LEVEL_4 = 0x0400; +static const Py_ssize_t LC_HEADING_LEVEL_5 = 0x0800; +static const Py_ssize_t LC_HEADING_LEVEL_6 = 0x1000; + +static const Py_ssize_t LC_COMMENT = 0x2000; + + +/* Global contexts: */ + +static const Py_ssize_t GL_HEADING = 0x1; + + +/* Tokenizer object definition: */ + +typedef struct { + PyObject_HEAD + PyObject* text; /* text to tokenize */ + PyObject* stacks; /* token stacks */ + PyObject* topstack; /* topmost stack */ + Py_ssize_t head; /* current position in text */ + Py_ssize_t length; /* length of text */ + Py_ssize_t global; /* global context */ +} Tokenizer; + + +/* Some macros for accessing Tokenizer data: */ + +#define Tokenizer_STACK(self) PySequence_Fast_GET_ITEM(self->topstack, 0) +#define Tokenizer_CONTEXT(self) PySequence_Fast_GET_ITEM(self->topstack, 1) +#define Tokenizer_CONTEXT_VAL(self) PyInt_AsSsize_t(Tokenizer_CONTEXT(self)) +#define Tokenizer_TEXTBUFFER(self) PySequence_Fast_GET_ITEM(self->topstack, 2) + + +/* Tokenizer function prototypes: */ + +static PyObject* Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds); +static void Tokenizer_dealloc(Tokenizer* self); +static int Tokenizer_init(Tokenizer* self, PyObject* args, PyObject* kwds); +static int Tokenizer_set_context(Tokenizer* self, Py_ssize_t value); +static int Tokenizer_set_textbuffer(Tokenizer* self, PyObject* value); +static int Tokenizer_push(Tokenizer* self, Py_ssize_t context); +static int Tokenizer_push_textbuffer(Tokenizer* self); +static int Tokenizer_delete_top_of_stack(Tokenizer* self); +static PyObject* Tokenizer_pop(Tokenizer* self); +static PyObject* Tokenizer_pop_keeping_context(Tokenizer* self); +static void Tokenizer_fail_route(Tokenizer* self); +static int Tokenizer_write(Tokenizer* self, PyObject* token); +static int Tokenizer_write_first(Tokenizer* self, PyObject* token); +static int Tokenizer_write_text(Tokenizer* self, PyObject* text); +static int Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist); +static int Tokenizer_write_text_then_stack(Tokenizer* self, PyObject* text); +static PyObject* Tokenizer_read(Tokenizer* self, Py_ssize_t delta); +static PyObject* Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta); +static int Tokenizer_parse_template_or_argument(Tokenizer* self); +static int Tokenizer_parse_template(Tokenizer* self); +static int Tokenizer_parse_argument(Tokenizer* self); +static int Tokenizer_verify_safe(Tokenizer* self, Py_UNICODE* unsafes[]); +static int Tokenizer_handle_template_param(Tokenizer* self); +static int Tokenizer_handle_template_param_value(Tokenizer* self); +static PyObject* Tokenizer_handle_template_end(Tokenizer* self); +static int Tokenizer_handle_argument_separator(Tokenizer* self); +static PyObject* Tokenizer_handle_argument_end(Tokenizer* self); +static int Tokenizer_parse_wikilink(Tokenizer* self); +static int Tokenizer_handle_wikilink_separator(Tokenizer* self); +static PyObject* Tokenizer_handle_wikilink_end(Tokenizer* self); +static int Tokenizer_parse_heading(Tokenizer* self); +static PyObject* Tokenizer_handle_heading_end(Tokenizer* self); +static int Tokenizer_really_parse_entity(Tokenizer* self); +static int Tokenizer_parse_entity(Tokenizer* self); +static int Tokenizer_parse_comment(Tokenizer* self); +static PyObject* Tokenizer_parse(Tokenizer* self, Py_ssize_t context); +static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject *args); + + +/* More structs for creating the Tokenizer type: */ + +static PyMethodDef +Tokenizer_methods[] = { + {"tokenize", (PyCFunction) Tokenizer_tokenize, METH_VARARGS, + "Build a list of tokens from a string of wikicode and return it."}, + {NULL} +}; + +static PyMemberDef +Tokenizer_members[] = { + {NULL} +}; + +static PyMethodDef +module_methods[] = { + {NULL} +}; + +static PyTypeObject +TokenizerType = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "_tokenizer.CTokenizer", /* tp_name */ + sizeof(Tokenizer), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor) Tokenizer_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + "Creates a list of tokens from a string of wikicode.", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + Tokenizer_methods, /* tp_methods */ + Tokenizer_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc) Tokenizer_init, /* tp_init */ + 0, /* tp_alloc */ + Tokenizer_new, /* tp_new */ +}; From 6edc24037eff257e82cfe3d86d3d2b253d2b5fa5 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 23 Sep 2012 19:14:23 -0400 Subject: [PATCH 009/180] Implement Tokenizer_parse_template_or_argument(). --- mwparserfromhell/parser/tokenizer.c | 118 ++++++++++++++++++++++++++++------- mwparserfromhell/parser/tokenizer.h | 3 +- mwparserfromhell/parser/tokenizer.py | 2 +- 3 files changed, 98 insertions(+), 25 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 41713e2..4877773 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -313,9 +313,8 @@ Tokenizer_read(Tokenizer* self, Py_ssize_t delta) { Py_ssize_t index = self->head + delta; - if (index >= self->length) { + if (index >= self->length) return EMPTY; - } return PySequence_Fast_GET_ITEM(self->text, index); } @@ -326,9 +325,8 @@ Tokenizer_read(Tokenizer* self, Py_ssize_t delta) static PyObject* Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) { - if (delta > self->head) { + if (delta > self->head) return EMPTY; - } Py_ssize_t index = self->head - delta; return PySequence_Fast_GET_ITEM(self->text, index); @@ -340,7 +338,84 @@ Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) static int Tokenizer_parse_template_or_argument(Tokenizer* self) { + self->head += 2; + unsigned int braces = 2, i; + + while (Tokenizer_READ(self, 0) == PU "{") { + self->head++; + braces++; + } + Tokenizer_push(self, 0); + + while (braces) { + if (braces == 1) { + PyObject* text = PyUnicode_FromString("{"); + + if (Tokenizer_write_text_then_stack(self, text)) { + Py_XDECREF(text); + return -1; + } + + Py_XDECREF(text); + return 0; + } + + if (braces == 2) { + if (setjmp(exception_env) == BAD_ROUTE) { + PyObject* text = PyUnicode_FromString("{{"); + + if (Tokenizer_write_text_then_stack(self, text)) { + Py_XDECREF(text); + return -1; + } + + Py_XDECREF(text); + return 0; + } else { + Tokenizer_parse_template(self); + } + break; + } + + if (setjmp(exception_env) == BAD_ROUTE) { + if (setjmp(exception_env) == BAD_ROUTE) { + char bracestr[braces]; + for (i = 0; i < braces; i++) { + bracestr[i] = *"{"; + } + PyObject* text = PyUnicode_FromString(bracestr); + + if (Tokenizer_write_text_then_stack(self, text)) { + Py_XDECREF(text); + return -1; + } + + Py_XDECREF(text); + return 0; + } + else { + Tokenizer_parse_template(self); + braces -= 2; + } + } + else { + Tokenizer_parse_argument(self); + braces -= 3; + } + + if (braces) { + self->head++; + } + } + PyObject* tokenlist = Tokenizer_pop(self); + if (Tokenizer_write_all(self, tokenlist)) { + Py_DECREF(tokenlist); + return -1; + } + + Py_DECREF(tokenlist); + return 0; } /* @@ -498,8 +573,8 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context) { Py_ssize_t fail_contexts = LC_TEMPLATE | LC_ARGUMENT | LC_HEADING | LC_COMMENT; - PyObject *this, *next; - Py_UNICODE *this_data, *next_data, *next_next_data, *last_data; + PyObject *this; + Py_UNICODE *this_data, *next, *next_next, *last; Py_ssize_t this_context; int is_marker, i; @@ -532,18 +607,17 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context) return Tokenizer_pop(self); } - next = Tokenizer_read(self, 1); - next_data = PyUnicode_AS_UNICODE(next); + next = Tokenizer_READ(self, 1); if (this_context & LC_COMMENT) { - if (this_data == next_data && next_data == PU "-") { - if (PyUnicode_AS_UNICODE(Tokenizer_read(self, 2)) == PU ">") { + if (this_data == next && next == PU "-") { + if (Tokenizer_READ(self, 2) == PU ">") { return Tokenizer_pop(self); } } Tokenizer_write_text(self, this); } - else if (this_data == next_data && next_data == PU "{") { + else if (this_data == next && next == PU "{") { Tokenizer_parse_template_or_argument(self); } else if (this_data == PU "|" && this_context & LC_TEMPLATE) { @@ -552,19 +626,19 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context) else if (this_data == PU "=" && this_context & LC_TEMPLATE_PARAM_KEY) { Tokenizer_handle_template_param_value(self); } - else if (this_data == next_data && next_data == PU "}" && this_context & LC_TEMPLATE) { + else if (this_data == next && next == PU "}" && this_context & LC_TEMPLATE) { Tokenizer_handle_template_end(self); } else if (this_data == PU "|" && this_context & LC_ARGUMENT_NAME) { Tokenizer_handle_argument_separator(self); } - else if (this_data == next_data && next_data == PU "}" && this_context & LC_ARGUMENT) { - if (PyUnicode_AS_UNICODE(Tokenizer_read(self, 2)) == PU "}") { + else if (this_data == next && next == PU "}" && this_context & LC_ARGUMENT) { + if (Tokenizer_READ(self, 2) == PU "}") { return Tokenizer_handle_argument_end(self); } Tokenizer_write_text(self, this); } - else if (this_data == next_data && next_data == PU "[") { + else if (this_data == next && next == PU "[") { if (!(this_context & LC_WIKILINK_TITLE)) { Tokenizer_parse_wikilink(self); } @@ -575,13 +649,12 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context) else if (this_data == PU "|" && this_context & LC_WIKILINK_TITLE) { Tokenizer_handle_wikilink_separator(self); } - else if (this_data == next_data && next_data == PU "]" && - this_context & LC_WIKILINK) { + else if (this_data == next && next == PU "]" && this_context & LC_WIKILINK) { return Tokenizer_handle_wikilink_end(self); } else if (this_data == PU "=" && !(self->global & GL_HEADING)) { - last_data = PyUnicode_AS_UNICODE(Tokenizer_read_backwards(self, 1)); - if (last_data == PU "\n" || last_data == PU "") { + last = PyUnicode_AS_UNICODE(Tokenizer_read_backwards(self, 1)); + if (last == PU "\n" || last == PU "") { Tokenizer_parse_heading(self); } else { @@ -597,10 +670,9 @@ Tokenizer_parse(Tokenizer* self, Py_ssize_t context) else if (this_data == PU "&") { Tokenizer_parse_entity(self); } - else if (this_data == PU "<" && next_data == PU "!") { - next_next_data = PyUnicode_AS_UNICODE(Tokenizer_read(self, 2)); - if (next_next_data == PyUnicode_AS_UNICODE(Tokenizer_read(self, 3)) && - next_next_data == PU "-") { + else if (this_data == PU "<" && next == PU "!") { + next_next = Tokenizer_READ(self, 2); + if (next_next == Tokenizer_READ(self, 3) && next_next == PU "-") { Tokenizer_parse_comment(self); } else { diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index c504dd8..3f7dfdf 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -87,12 +87,13 @@ typedef struct { } Tokenizer; -/* Some macros for accessing Tokenizer data: */ +/* Macros for accessing Tokenizer data: */ #define Tokenizer_STACK(self) PySequence_Fast_GET_ITEM(self->topstack, 0) #define Tokenizer_CONTEXT(self) PySequence_Fast_GET_ITEM(self->topstack, 1) #define Tokenizer_CONTEXT_VAL(self) PyInt_AsSsize_t(Tokenizer_CONTEXT(self)) #define Tokenizer_TEXTBUFFER(self) PySequence_Fast_GET_ITEM(self->topstack, 2) +#define Tokenizer_READ(self, num) PyUnicode_AS_UNICODE(Tokenizer_read(self, num)) /* Tokenizer function prototypes: */ diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index ca645b0..364455d 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -162,8 +162,8 @@ class Tokenizer(object): self._head += 2 braces = 2 while self._read() == "{": - braces += 1 self._head += 1 + braces += 1 self._push() while braces: From 0d720a7ef13e7e377dd0d47c88d1e68c717e8b2c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 23 Sep 2012 20:35:49 -0400 Subject: [PATCH 010/180] Implement Tokenizer_parse_template(); NOARGS and NOKWARGS. --- mwparserfromhell/parser/tokenizer.c | 50 +++++++++++++++++++++++++++---------- mwparserfromhell/parser/tokenizer.h | 2 ++ 2 files changed, 39 insertions(+), 13 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 4877773..d9b953b 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -111,29 +111,22 @@ Tokenizer_push_textbuffer(Tokenizer* self) PyObject* text = PyUnicode_Join(EMPTY, Tokenizer_TEXTBUFFER(self)); if (!text) return -1; - PyObject* klass = PyObject_GetAttrString(tokens, "Text"); - if (!klass) return -1; - PyObject* args = PyTuple_New(0); - if (!args) return -1; + PyObject* class = PyObject_GetAttrString(tokens, "Text"); + if (!class) return -1; PyObject* kwargs = PyDict_New(); if (!kwargs) return -1; PyDict_SetItemString(kwargs, "text", text); Py_DECREF(text); - PyObject* token = PyInstance_New(klass, args, kwargs); + PyObject* token = PyInstance_New(class, NOARGS, kwargs); + Py_DECREF(class); + Py_DECREF(kwargs); if (!token) { - Py_DECREF(klass); - Py_DECREF(args); - Py_DECREF(kwargs); return -1; } - Py_DECREF(klass); - Py_DECREF(args); - Py_DECREF(kwargs); - if (PyList_Append(Tokenizer_STACK(self), token)) { - Py_XDECREF(token); + Py_DECREF(token); return -1; } @@ -424,7 +417,36 @@ Tokenizer_parse_template_or_argument(Tokenizer* self) static int Tokenizer_parse_template(Tokenizer* self) { + Py_ssize_t reset = self->head; + if (setjmp(exception_env) == BAD_ROUTE) { + self->head = reset; + longjmp(exception_env, BAD_ROUTE); + } + else { + PyObject* template = Tokenizer_parse(self, LC_TEMPLATE_NAME); + if (!template) return -1; + + PyObject* class = PyObject_GetAttrString(tokens, "TemplateOpen"); + if (!class) return -1; + PyObject* token = PyInstance_New(class, NOARGS, NOKWARGS); + Py_DECREF(class); + if (!token) return -1; + Tokenizer_write_first(self, token); + Py_DECREF(token); + + Tokenizer_write_all(self, template); + Py_DECREF(template); + + class = PyObject_GetAttrString(tokens, "TemplateClose"); + if (!class) return -1; + token = PyInstance_New(class, NOARGS, NOKWARGS); + Py_DECREF(class); + if (!token) return -1; + + Tokenizer_write(self, token); + Py_DECREF(token); + } } /* @@ -740,6 +762,8 @@ init_tokenizer(void) PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType); EMPTY = PyUnicode_FromString(""); + NOARGS = PyTuple_New(0); + NOKWARGS = PyDict_New(); PyObject* globals = PyEval_GetGlobals(); PyObject* locals = PyEval_GetLocals(); diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index 3f7dfdf..d6c97c8 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -40,6 +40,8 @@ static jmp_buf exception_env; static const int BAD_ROUTE = 1; static PyObject* EMPTY; +static PyObject* NOARGS; +static PyObject* NOKWARGS; static PyObject* tokens; From 849016f73488eb4eee51fb8c0b16f49231e2dc3b Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 23 Sep 2012 21:27:08 -0400 Subject: [PATCH 011/180] Implement Tokenizer_verify_safe() and some others. --- mwparserfromhell/parser/tokenizer.c | 164 +++++++++++++++++++++++++++++++++--- mwparserfromhell/parser/tokenizer.h | 2 +- 2 files changed, 155 insertions(+), 11 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index d9b953b..3d3b95f 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -121,9 +121,7 @@ Tokenizer_push_textbuffer(Tokenizer* self) PyObject* token = PyInstance_New(class, NOARGS, kwargs); Py_DECREF(class); Py_DECREF(kwargs); - if (!token) { - return -1; - } + if (!token) return -1; if (PyList_Append(Tokenizer_STACK(self), token)) { Py_DECREF(token); @@ -417,25 +415,34 @@ Tokenizer_parse_template_or_argument(Tokenizer* self) static int Tokenizer_parse_template(Tokenizer* self) { + PyObject *template, *class, *token; Py_ssize_t reset = self->head; + if (setjmp(exception_env) == BAD_ROUTE) { self->head = reset; longjmp(exception_env, BAD_ROUTE); } + else { - PyObject* template = Tokenizer_parse(self, LC_TEMPLATE_NAME); + template = Tokenizer_parse(self, LC_TEMPLATE_NAME); if (!template) return -1; - PyObject* class = PyObject_GetAttrString(tokens, "TemplateOpen"); + class = PyObject_GetAttrString(tokens, "TemplateOpen"); if (!class) return -1; - PyObject* token = PyInstance_New(class, NOARGS, NOKWARGS); + token = PyInstance_New(class, NOARGS, NOKWARGS); Py_DECREF(class); if (!token) return -1; - Tokenizer_write_first(self, token); + if (Tokenizer_write_first(self, token)) { + Py_DECREF(token); + return -1; + } Py_DECREF(token); - Tokenizer_write_all(self, template); + if (Tokenizer_write_all(self, template)) { + Py_DECREF(template); + return -1; + } Py_DECREF(template); class = PyObject_GetAttrString(tokens, "TemplateClose"); @@ -444,9 +451,14 @@ Tokenizer_parse_template(Tokenizer* self) Py_DECREF(class); if (!token) return -1; - Tokenizer_write(self, token); + if (Tokenizer_write(self, token)) { + Py_DECREF(token); + return -1; + } Py_DECREF(token); } + + return 0; } /* @@ -455,7 +467,50 @@ Tokenizer_parse_template(Tokenizer* self) static int Tokenizer_parse_argument(Tokenizer* self) { + PyObject *argument, *class, *token; + Py_ssize_t reset = self->head; + + if (setjmp(exception_env) == BAD_ROUTE) { + self->head = reset; + longjmp(exception_env, BAD_ROUTE); + } + + else { + argument = Tokenizer_parse(self, LC_ARGUMENT_NAME); + if (!argument) return -1; + + class = PyObject_GetAttrString(tokens, "ArgumentOpen"); + if (!class) return -1; + token = PyInstance_New(class, NOARGS, NOKWARGS); + Py_DECREF(class); + if (!token) return -1; + + if (Tokenizer_write_first(self, token)) { + Py_DECREF(token); + return -1; + } + Py_DECREF(token); + + if (Tokenizer_write_all(self, argument)) { + Py_DECREF(argument); + return -1; + } + Py_DECREF(argument); + + class = PyObject_GetAttrString(tokens, "ArgumentClose"); + if (!class) return -1; + token = PyInstance_New(class, NOARGS, NOKWARGS); + Py_DECREF(class); + if (!token) return -1; + if (Tokenizer_write(self, token)) { + Py_DECREF(token); + return -1; + } + Py_DECREF(token); + } + + return 0; } /* @@ -465,9 +520,98 @@ Tokenizer_parse_argument(Tokenizer* self) or parameter key, which cannot contain newlines. */ static int -Tokenizer_verify_safe(Tokenizer* self, Py_UNICODE* unsafes[]) +Tokenizer_verify_safe(Tokenizer* self, const char* unsafes[]) { + if (Tokenizer_push_textbuffer(self)) + return -1; + PyObject* stack = Tokenizer_STACK(self); + if (stack) { + PyObject* textlist = PyList_New(0); + if (!textlist) return -1; + + PyObject* class = PyObject_GetAttrString(tokens, "Text"); + if (!class) { + Py_DECREF(textlist); + return -1; + } + + int i; + Py_ssize_t length = PySequence_Fast_GET_SIZE(stack); + PyObject *token, *textdata; + + for (i = 0; i < length; i++) { + token = PySequence_Fast_GET_ITEM(stack, i); + switch (PyObject_IsInstance(token, class)) { + case -1: + Py_DECREF(textlist); + Py_DECREF(class); + return -1; + case 0: + break; + case 1: + textdata = PyObject_GetAttrString(token, "text"); + if (!textdata) { + Py_DECREF(textlist); + Py_DECREF(class); + return -1; + } + if (PyList_Append(textlist, textdata)) { + Py_DECREF(textlist); + Py_DECREF(class); + Py_DECREF(textdata); + return -1; + } + Py_DECREF(textdata); + } + } + Py_DECREF(class); + + PyObject* text = PyUnicode_Join(EMPTY, textlist); + if (!text) { + Py_DECREF(textlist); + return -1; + } + Py_DECREF(textlist); + + PyObject* stripped = PyObject_CallMethod(text, "strip", NULL); + if (!stripped) { + Py_DECREF(text); + return -1; + } + Py_DECREF(text); + + const char* unsafe_char; + PyObject* unsafe; + i = 0; + while (1) { + unsafe_char = unsafes[i]; + if (!unsafe_char) break; + + unsafe = PyUnicode_FromString(unsafe_char); + + if (!unsafe) { + Py_DECREF(stripped); + return -1; + } + + switch (PyUnicode_Contains(stripped, unsafe)) { + case -1: + Py_DECREF(stripped); + Py_DECREF(unsafe); + return -1; + case 0: + break; + case 1: + Py_DECREF(stripped); + Py_DECREF(unsafe); + Tokenizer_fail_route(self); + } + i++; + } + } + + return 0; } /* diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index d6c97c8..951e238 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -121,7 +121,7 @@ static PyObject* Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta); static int Tokenizer_parse_template_or_argument(Tokenizer* self); static int Tokenizer_parse_template(Tokenizer* self); static int Tokenizer_parse_argument(Tokenizer* self); -static int Tokenizer_verify_safe(Tokenizer* self, Py_UNICODE* unsafes[]); +static int Tokenizer_verify_safe(Tokenizer* self, const char* unsafes[]); static int Tokenizer_handle_template_param(Tokenizer* self); static int Tokenizer_handle_template_param_value(Tokenizer* self); static PyObject* Tokenizer_handle_template_end(Tokenizer* self); From 17af353fb652e01eb61584c0f5c6248edd17e9be Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 24 Sep 2012 21:18:44 -0400 Subject: [PATCH 012/180] Implement Tokenizer_handle_template_param(). --- mwparserfromhell/parser/tokenizer.c | 62 +++++++++++++++++++++++++++++++------ 1 file changed, 53 insertions(+), 9 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 3d3b95f..3ab2437 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -206,7 +206,8 @@ Tokenizer_pop_keeping_context(Tokenizer* self) static void Tokenizer_fail_route(Tokenizer* self) { - Tokenizer_pop(self); + PyObject* stack = Tokenizer_pop(self); + Py_XDECREF(stack); longjmp(exception_env, BAD_ROUTE); } @@ -400,6 +401,7 @@ Tokenizer_parse_template_or_argument(Tokenizer* self) } PyObject* tokenlist = Tokenizer_pop(self); + if (!tokenlist) return -1; if (Tokenizer_write_all(self, tokenlist)) { Py_DECREF(tokenlist); return -1; @@ -543,10 +545,6 @@ Tokenizer_verify_safe(Tokenizer* self, const char* unsafes[]) for (i = 0; i < length; i++) { token = PySequence_Fast_GET_ITEM(stack, i); switch (PyObject_IsInstance(token, class)) { - case -1: - Py_DECREF(textlist); - Py_DECREF(class); - return -1; case 0: break; case 1: @@ -563,6 +561,11 @@ Tokenizer_verify_safe(Tokenizer* self, const char* unsafes[]) return -1; } Py_DECREF(textdata); + break; + case -1: + Py_DECREF(textlist); + Py_DECREF(class); + return -1; } } Py_DECREF(class); @@ -596,16 +599,17 @@ Tokenizer_verify_safe(Tokenizer* self, const char* unsafes[]) } switch (PyUnicode_Contains(stripped, unsafe)) { - case -1: - Py_DECREF(stripped); - Py_DECREF(unsafe); - return -1; case 0: break; case 1: Py_DECREF(stripped); Py_DECREF(unsafe); Tokenizer_fail_route(self); + break; + case -1: + Py_DECREF(stripped); + Py_DECREF(unsafe); + return -1; } i++; } @@ -620,7 +624,47 @@ Tokenizer_verify_safe(Tokenizer* self, const char* unsafes[]) static int Tokenizer_handle_template_param(Tokenizer* self) { + Py_ssize_t context = Tokenizer_CONTEXT_VAL(self); + if (context & LC_TEMPLATE_NAME) { + if (Tokenizer_verify_safe(self, {"\n", "{", "}", "[", "]"})) + return -1; + if (Tokenizer_set_context(self, context ^ LC_TEMPLATE_NAME)) + return -1; + } + else if (context & LC_TEMPLATE_PARAM_VALUE) { + if (Tokenizer_set_context(self, context ^ LC_TEMPLATE_PARAM_VALUE)) + return -1; + } + + if (context & LC_TEMPLATE_PARAM_KEY) { + PyObject* stack = Tokenizer_pop_keeping_context(self); + if (!stack) return -1; + if (Tokenizer_write_all(stack)) { + Py_DECREF(stack); + return -1; + } + Py_DECREF(stack); + } + else { + if (Tokenizer_set_context(self, context | LC_TEMPLATE_PARAM_KEY)) + return -1; + } + + class = PyObject_GetAttrString(tokens, "TemplateParamSeparator"); + if (!class) return -1; + token = PyInstance_New(class, NOARGS, NOKWARGS); + Py_DECREF(class); + if (!token) return -1; + + if (Tokenizer_write(self, token)) { + Py_DECREF(token); + return -1; + } + Py_DECREF(token); + + Tokenizer_push(self, Tokenizer_CONTEXT_VAL(self)); + return 0; } /* From 41535992a1a3488724435f4482642c6aa40bca45 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 25 Sep 2012 17:09:27 -0400 Subject: [PATCH 013/180] Implement Tokenizer_handle_template_param_value(). --- mwparserfromhell/parser/tokenizer.c | 45 ++++++++++++++++++++++++++++++++---- mwparserfromhell/parser/tokenizer.py | 17 ++++++-------- 2 files changed, 48 insertions(+), 14 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 3ab2437..e7699fd 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -627,7 +627,8 @@ Tokenizer_handle_template_param(Tokenizer* self) Py_ssize_t context = Tokenizer_CONTEXT_VAL(self); if (context & LC_TEMPLATE_NAME) { - if (Tokenizer_verify_safe(self, {"\n", "{", "}", "[", "]"})) + const char* unsafes[] = {"\n", "{", "}", "[", "]"}; + if (Tokenizer_verify_safe(self, unsafes)) return -1; if (Tokenizer_set_context(self, context ^ LC_TEMPLATE_NAME)) return -1; @@ -640,7 +641,7 @@ Tokenizer_handle_template_param(Tokenizer* self) if (context & LC_TEMPLATE_PARAM_KEY) { PyObject* stack = Tokenizer_pop_keeping_context(self); if (!stack) return -1; - if (Tokenizer_write_all(stack)) { + if (Tokenizer_write_all(self, stack)) { Py_DECREF(stack); return -1; } @@ -651,9 +652,9 @@ Tokenizer_handle_template_param(Tokenizer* self) return -1; } - class = PyObject_GetAttrString(tokens, "TemplateParamSeparator"); + PyObject* class = PyObject_GetAttrString(tokens, "TemplateParamSeparator"); if (!class) return -1; - token = PyInstance_New(class, NOARGS, NOKWARGS); + PyObject* token = PyInstance_New(class, NOARGS, NOKWARGS); Py_DECREF(class); if (!token) return -1; @@ -673,7 +674,43 @@ Tokenizer_handle_template_param(Tokenizer* self) static int Tokenizer_handle_template_param_value(Tokenizer* self) { + if (setjmp(exception_env) == BAD_ROUTE) { + PyObject* stack = Tokenizer_pop(self); + Py_XDECREF(stack); + longjmp(exception_env, BAD_ROUTE); + } + + else { + const char* unsafes[] = {"\n", "{{", "}}"}; + if (Tokenizer_verify_safe(self, unsafes)) + return -1; + } + + PyObject* stack = Tokenizer_pop_keeping_context(self); + if (!stack) return -1; + if (Tokenizer_write_all(self, stack)) { + Py_DECREF(stack); + return -1; + } + Py_DECREF(stack); + Py_ssize_t context = Tokenizer_CONTEXT_VAL(self); + context ^= LC_TEMPLATE_PARAM_KEY; + context |= LC_TEMPLATE_PARAM_VALUE; + if (Tokenizer_set_context(self, context)) + return -1; + + PyObject* class = PyObject_GetAttrString(tokens, "TemplateParamEquals"); + if (!class) return -1; + PyObject* token = PyInstance_New(class, NOARGS, NOKWARGS); + Py_DECREF(class); + if (!token) return -1; + + if (Tokenizer_write(self, token)) { + Py_DECREF(token); + return -1; + } + Py_DECREF(token); } /* diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 364455d..508344e 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -197,10 +197,9 @@ class Tokenizer(object): except BadRoute: self._head = reset raise - else: - self._write_first(tokens.TemplateOpen()) - self._write_all(template) - self._write(tokens.TemplateClose()) + self._write_first(tokens.TemplateOpen()) + self._write_all(template) + self._write(tokens.TemplateClose()) def _parse_argument(self): """Parse an argument at the head of the wikicode string.""" @@ -210,10 +209,9 @@ class Tokenizer(object): except BadRoute: self._head = reset raise - else: - self._write_first(tokens.ArgumentOpen()) - self._write_all(argument) - self._write(tokens.ArgumentClose()) + self._write_first(tokens.ArgumentOpen()) + self._write_all(argument) + self._write(tokens.ArgumentClose()) def _verify_safe(self, unsafes): """Verify that there are no unsafe characters in the current stack. @@ -249,8 +247,7 @@ class Tokenizer(object): except BadRoute: self._pop() raise - else: - self._write_all(self._pop(keep_context=True)) + self._write_all(self._pop(keep_context=True)) self._context ^= contexts.TEMPLATE_PARAM_KEY self._context |= contexts.TEMPLATE_PARAM_VALUE self._write(tokens.TemplateParamEquals()) From f401ede179b469118ac936a8646e5f5a3be128d4 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 25 Sep 2012 17:32:43 -0400 Subject: [PATCH 014/180] Implementing more stuff. --- mwparserfromhell/parser/tokenizer.c | 84 +++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index e7699fd..b895f6c 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -259,6 +259,39 @@ Tokenizer_write_text(Tokenizer* self, PyObject* text) static int Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist) { + if (PySequence_Fast_GET_SIZE(tokenlist) > 0) { + PyObject* token = PySequence_Fast_GET_ITEM(tokenlist, 0); + PyObject* class = PyObject_GetAttrString(tokens, "Text"); + if (!class) return -1; + + switch (PyObject_IsInstance(token, class)) { + case 0: + break; + case 1: + PyObject* text = PyObject_GetAttrString(token, "text"); + if (!text) { + Py_DECREF(class); + return -1; + } + if (PySequence_DelItem(tokenlist, 0)) { + Py_DECREF(text); + Py_DECREF(class); + return -1; + } + if (Tokenizer_write_text(self, text)) { + Py_DECREF(text); + Py_DECREF(class); + return -1; + } + Py_DECREF(text); + break + case -1: + Py_DECREF(class); + return -1; + } + Py_DECREF(class); + } + if (Tokenizer_push_textbuffer(self)) return -1; @@ -711,6 +744,7 @@ Tokenizer_handle_template_param_value(Tokenizer* self) return -1; } Py_DECREF(token); + return 0; } /* @@ -719,7 +753,27 @@ Tokenizer_handle_template_param_value(Tokenizer* self) static PyObject* Tokenizer_handle_template_end(Tokenizer* self) { + PyObject* stack; + Py_ssize_t context = Tokenizer_CONTEXT_VAL(self); + if (context & LC_TEMPLATE_NAME) { + const char* unsafes[] = {"\n", "{", "}", "[", "]"}; + if (Tokenizer_verify_safe(self, unsafes)) + return NULL; + } + else if (context & LC_TEMPLATE_PARAM_KEY) { + stack = Tokenizer_pop_keeping_context(self); + if (!stack) return NULL; + if (Tokenizer_write_all(self, stack)) { + Py_DECREF(stack); + return NULL; + } + Py_DECREF(stack); + } + + self->head++; + stack = Tokenizer_pop(self); + return stack; } /* @@ -728,7 +782,28 @@ Tokenizer_handle_template_end(Tokenizer* self) static int Tokenizer_handle_argument_separator(Tokenizer* self) { + const char* unsafes[] = {"\n", "{{", "}}"}; + if (Tokenizer_verify_safe(self, unsafes)) + return -1; + + Py_ssize_t context = Tokenizer_CONTEXT_VAL(self); + context ^= LC_ARGUMENT_NAME; + context |= LC_ARGUMENT_DEFAULT; + if (Tokenizer_set_context(self, context)) + return -1; + + PyObject* class = PyObject_GetAttrString(tokens, "ArgumentSeparator"); + if (!class) return -1; + PyObject* token = PyInstance_New(class, NOARGS, NOKWARGS); + Py_DECREF(class); + if (!token) return -1; + if (Tokenizer_write(self, token)) { + Py_DECREF(token); + return -1; + } + Py_DECREF(token); + return 0; } /* @@ -737,7 +812,16 @@ Tokenizer_handle_argument_separator(Tokenizer* self) static PyObject* Tokenizer_handle_argument_end(Tokenizer* self) { + Py_ssize_t context = Tokenizer_CONTEXT_VAL(self); + if (context & LC_ARGUMENT_NAME) { + const char* unsafes[] = {"\n", "{{", "}}"}; + if (Tokenizer_verify_safe(self, unsafes)) + return NULL; + } + self->head += 2; + PyObject* stack = Tokenizer_pop(self); + return stack; } /* From 707ecc383740165096d74c471e5f1b739f752f71 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 25 Sep 2012 17:51:23 -0400 Subject: [PATCH 015/180] Implement Tokenizer_parse_wikilink() and more. --- mwparserfromhell/parser/tokenizer.c | 83 ++++++++++++++++++++++++++++++++++--- 1 file changed, 77 insertions(+), 6 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index b895f6c..9068d94 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -264,11 +264,12 @@ Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist) PyObject* class = PyObject_GetAttrString(tokens, "Text"); if (!class) return -1; + PyObject* text; switch (PyObject_IsInstance(token, class)) { case 0: break; case 1: - PyObject* text = PyObject_GetAttrString(token, "text"); + text = PyObject_GetAttrString(token, "text"); if (!text) { Py_DECREF(class); return -1; @@ -284,7 +285,7 @@ Tokenizer_write_all(Tokenizer* self, PyObject* tokenlist) return -1; } Py_DECREF(text); - break + break; case -1: Py_DECREF(class); return -1; @@ -463,13 +464,20 @@ Tokenizer_parse_template(Tokenizer* self) if (!template) return -1; class = PyObject_GetAttrString(tokens, "TemplateOpen"); - if (!class) return -1; + if (!class) { + Py_DECREF(template); + return -1; + } token = PyInstance_New(class, NOARGS, NOKWARGS); Py_DECREF(class); - if (!token) return -1; + if (!token) { + Py_DECREF(template); + return -1; + } if (Tokenizer_write_first(self, token)) { Py_DECREF(token); + Py_DECREF(template); return -1; } Py_DECREF(token); @@ -515,13 +523,20 @@ Tokenizer_parse_argument(Tokenizer* self) if (!argument) return -1; class = PyObject_GetAttrString(tokens, "ArgumentOpen"); - if (!class) return -1; + if (!class) { + Py_DECREF(argument); + return -1; + } token = PyInstance_New(class, NOARGS, NOKWARGS); Py_DECREF(class); - if (!token) return -1; + if (!token) { + Py_DECREF(argument); + return -1; + } if (Tokenizer_write_first(self, token)) { Py_DECREF(token); + Py_DECREF(argument); return -1; } Py_DECREF(token); @@ -830,7 +845,63 @@ Tokenizer_handle_argument_end(Tokenizer* self) static int Tokenizer_parse_wikilink(Tokenizer* self) { + self->head += 2; + Py_ssize_t reset = self->head - 1; + + if (setjmp(exception_env) == BAD_ROUTE) { + self->head = reset; + PyObject* text = PyUnicode_FromString("[["); + if (!text) return -1; + if (Tokenizer_write_text(self, text)) { + Py_XDECREF(text); + return -1; + } + } + + else { + PyObject *class, *token; + PyObject *wikilink = Tokenizer_parse(self, LC_WIKILINK_TITLE); + if (!wikilink) return -1; + + class = PyObject_GetAttrString(tokens, "WikilinkOpen"); + if (!class) { + Py_DECREF(wikilink); + return -1; + } + token = PyInstance_New(class, NOARGS, NOKWARGS); + Py_DECREF(class); + if (!token) { + Py_DECREF(wikilink); + return -1; + } + + if (Tokenizer_write(self, token)) { + Py_DECREF(token); + Py_DECREF(wikilink); + return -1; + } + Py_DECREF(token); + if (Tokenizer_write_all(self, wikilink)) { + Py_DECREF(wikilink); + return -1; + } + Py_DECREF(wikilink); + + class = PyObject_GetAttrString(tokens, "WikilinkClose"); + if (!class) return -1; + token = PyInstance_New(class, NOARGS, NOKWARGS); + Py_DECREF(class); + if (!token) return -1; + + if (Tokenizer_write(self, token)) { + Py_DECREF(token); + return -1; + } + Py_DECREF(token); + } + + return 0; } /* From 7c29a2a65e253ad5a9473fe7fc65786666889d1a Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 25 Sep 2012 17:54:38 -0400 Subject: [PATCH 016/180] Implement Tokenizer_handle_wikilink_separator()/_end(). --- mwparserfromhell/parser/tokenizer.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 9068d94..907c55e 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -827,8 +827,7 @@ Tokenizer_handle_argument_separator(Tokenizer* self) static PyObject* Tokenizer_handle_argument_end(Tokenizer* self) { - Py_ssize_t context = Tokenizer_CONTEXT_VAL(self); - if (context & LC_ARGUMENT_NAME) { + if (Tokenizer_CONTEXT_VAL(self) & LC_ARGUMENT_NAME) { const char* unsafes[] = {"\n", "{{", "}}"}; if (Tokenizer_verify_safe(self, unsafes)) return NULL; @@ -910,7 +909,28 @@ Tokenizer_parse_wikilink(Tokenizer* self) static int Tokenizer_handle_wikilink_separator(Tokenizer* self) { + const char* unsafes[] = {"\n", "{", "}", "[", "]"}; + if (Tokenizer_verify_safe(self, unsafes)) + return -1; + + Py_ssize_t context = Tokenizer_CONTEXT_VAL(self); + context ^= LC_WIKILINK_TITLE; + context |= LC_WIKILINK_TEXT; + if (Tokenizer_set_context(self, context)) + return -1; + + PyObject* class = PyObject_GetAttrString(tokens, "WikilinkSeparator"); + if (!class) return -1; + PyObject* token = PyInstance_New(class, NOARGS, NOKWARGS); + Py_DECREF(class); + if (!token) return -1; + if (Tokenizer_write(self, token)) { + Py_DECREF(token); + return -1; + } + Py_DECREF(token); + return 0; } /* @@ -919,7 +939,15 @@ Tokenizer_handle_wikilink_separator(Tokenizer* self) static PyObject* Tokenizer_handle_wikilink_end(Tokenizer* self) { + if (Tokenizer_CONTEXT_VAL(self) & LC_WIKILINK_TITLE) { + const char* unsafes[] = {"\n", "{", "}", "[", "]"}; + if (Tokenizer_verify_safe(self, unsafes)) + return NULL; + } + self->head += 1; + PyObject* stack = Tokenizer_pop(self); + return stack; } /* From 150f3311290a8569eb960084e070eb23f6e70c3c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 25 Sep 2012 18:11:29 -0400 Subject: [PATCH 017/180] Implement Tokenizer_parse_entity(), Tokenizer_parse_comment(). --- mwparserfromhell/parser/tokenizer.c | 79 ++++++++++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 2 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 907c55e..d302ea2 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -856,7 +856,6 @@ Tokenizer_parse_wikilink(Tokenizer* self) return -1; } } - else { PyObject *class, *token; PyObject *wikilink = Tokenizer_parse(self, LC_WIKILINK_TITLE); @@ -899,7 +898,6 @@ Tokenizer_parse_wikilink(Tokenizer* self) } Py_DECREF(token); } - return 0; } @@ -983,7 +981,29 @@ Tokenizer_really_parse_entity(Tokenizer* self) static int Tokenizer_parse_entity(Tokenizer* self) { + Py_ssize_t reset = self->head; + if (Tokenizer_push(self, 0)) + return -1; + if (setjmp(exception_env) == BAD_ROUTE) { + self->head = reset; + if (Tokenizer_write_text(self, Tokenizer_read(self, 0))) + return -1; + } + else { + if (Tokenizer_really_parse_entity(self)) + return -1; + + PyObject* tokenlist = Tokenizer_pop(self); + if (!tokenlist) return -1; + if (Tokenizer_write_all(self, tokenlist)) { + Py_DECREF(tokenlist); + return -1; + } + + Py_DECREF(tokenlist); + } + return 0; } /* @@ -992,7 +1012,62 @@ Tokenizer_parse_entity(Tokenizer* self) static int Tokenizer_parse_comment(Tokenizer* self) { + self->head += 4; + Py_ssize_t reset = self->head - 1; + if (setjmp(exception_env) == BAD_ROUTE) { + self->head = reset; + PyObject* text = PyUnicode_FromString("``.""" + def __init__(self, contents): super(Comment, self).__init__() self._contents = contents diff --git a/mwparserfromhell/nodes/text.py b/mwparserfromhell/nodes/text.py index 60ba847..6fda3da 100644 --- a/mwparserfromhell/nodes/text.py +++ b/mwparserfromhell/nodes/text.py @@ -29,6 +29,7 @@ __all__ = ["Text"] class Text(Node): """Represents ordinary, unformatted text with no special properties.""" + def __init__(self, value): super(Text, self).__init__() self._value = value diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py index f880016..6fea468 100644 --- a/mwparserfromhell/nodes/wikilink.py +++ b/mwparserfromhell/nodes/wikilink.py @@ -30,6 +30,7 @@ __all__ = ["Wikilink"] class Wikilink(Node): """Represents an internal wikilink, like ``[[Foo|Bar]]``.""" + def __init__(self, title, text=None): super(Wikilink, self).__init__() self._title = title diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index efd28d8..eee58b9 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -50,6 +50,7 @@ class StringMixIn(object): :py:meth:`__unicode__` instead of the immutable ``self`` like the regular ``str`` type. """ + if py3k: def __str__(self): return self.__unicode__() diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 4d12dc9..379b4fa 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -38,6 +38,7 @@ class TokenizerTestCase(object): TestCTokenizer. Tests are loaded dynamically from files in the 'tokenizer' directory. """ + @classmethod def _build_test_method(cls, funcname, data): """Create and return a method to be treated as a test case method. diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 07b5290..4dbeceb 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -27,6 +27,7 @@ from _test_tokenizer import TokenizerTestCase class TestCTokenizer(TokenizerTestCase, unittest.TestCase): """Test cases for the C tokenizer.""" + @classmethod def setUpClass(cls): from mwparserfromhell.parser._tokenizer import CTokenizer diff --git a/tests/test_docs.py b/tests/test_docs.py index 8673cb9..075b0a7 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -30,6 +30,7 @@ from mwparserfromhell.compat import py3k, str, StringIO class TestDocs(unittest.TestCase): """Integration test cases for mwparserfromhell's documentation.""" + def assertPrint(self, input, output): """Assertion check that *input*, when printed, produces *output*.""" buff = StringIO() diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index a2f2482..73e6fe7 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -27,6 +27,7 @@ from _test_tokenizer import TokenizerTestCase class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): """Test cases for the Python tokenizer.""" + @classmethod def setUpClass(cls): from mwparserfromhell.parser.tokenizer import Tokenizer diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index b0a10cb..f6d22ae 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -384,7 +384,5 @@ class TestSmartList(unittest.TestCase): self.assertEquals([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) self.assertEquals([4, 3, 2, 1.9, 1.8], child2) - # also test whether children that exit scope are removed from parent's map - if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 8d86c8e..7b99995 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -37,6 +37,7 @@ class _FakeString(StringMixIn): class TestStringMixIn(unittest.TestCase): """Test cases for the StringMixIn class.""" + def test_docs(self): """make sure the various methods of StringMixIn have docstrings""" methods = [ From 6a741db7ce98239108f21004b2a9d2f99a63f90f Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 25 Mar 2013 18:25:03 -0400 Subject: [PATCH 106/180] Applying fb71f5507eca7bc73fae764549a7579889817cba --- mwparserfromhell/parser/__init__.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py index 074b9ba..1fb95b5 100644 --- a/mwparserfromhell/parser/__init__.py +++ b/mwparserfromhell/parser/__init__.py @@ -26,16 +26,16 @@ modules: the :py:mod:`~.tokenizer` and the :py:mod:`~.builder`. This module joins them together under one interface. """ +from .builder import Builder +from .tokenizer import Tokenizer try: - from ._builder import CBuilder as Builder + from ._tokenizer import CTokenizer + use_c = True except ImportError: - from .builder import Builder -try: - from ._tokenizer import CTokenizer as Tokenizer -except ImportError: - from .tokenizer import Tokenizer + CTokenizer = None + use_c = False -__all__ = ["Parser"] +__all__ = ["use_c", "Parser"] class Parser(object): """Represents a parser for wikicode. @@ -48,7 +48,10 @@ class Parser(object): def __init__(self, text): self.text = text - self._tokenizer = Tokenizer() + if use_c and CTokenizer: + self._tokenizer = CTokenizer() + else: + self._tokenizer = Tokenizer() self._builder = Builder() def parse(self): From 9e26264d6b8d462cd93bc4c475c91abfe6d3b501 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 25 Mar 2013 19:13:32 -0400 Subject: [PATCH 107/180] Replace deprecated alias assertEquals() with assertEqual(). --- tests/test_smart_list.py | 244 ++++++++++++++++++++++----------------------- tests/test_string_mixin.py | 228 +++++++++++++++++++++--------------------- tests/test_tokens.py | 24 ++--- 3 files changed, 248 insertions(+), 248 deletions(-) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index f6d22ae..680de9d 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -39,80 +39,80 @@ class TestSmartList(unittest.TestCase): list1 = builder([0, 1, 2, 3, "one", "two"]) list2 = builder(list(range(10))) - self.assertEquals(1, list1[1]) - self.assertEquals("one", list1[-2]) - self.assertEquals([2, 3], list1[2:4]) + self.assertEqual(1, list1[1]) + self.assertEqual("one", list1[-2]) + self.assertEqual([2, 3], list1[2:4]) self.assertRaises(IndexError, lambda: list1[6]) self.assertRaises(IndexError, lambda: list1[-7]) - self.assertEquals([0, 1, 2], list1[:3]) - self.assertEquals([0, 1, 2, 3, "one", "two"], list1[:]) - self.assertEquals([3, "one", "two"], list1[3:]) - self.assertEquals(["one", "two"], list1[-2:]) - self.assertEquals([0, 1], list1[:-4]) - self.assertEquals([], list1[6:]) - self.assertEquals([], list1[4:2]) - - self.assertEquals([0, 2, "one"], list1[0:5:2]) - self.assertEquals([0, 2], list1[0:-3:2]) - self.assertEquals([0, 1, 2, 3, "one", "two"], list1[::]) - self.assertEquals([2, 3, "one", "two"], list1[2::]) - self.assertEquals([0, 1, 2, 3], list1[:4:]) - self.assertEquals([2, 3], list1[2:4:]) - self.assertEquals([0, 2, 4, 6, 8], list2[::2]) - self.assertEquals([2, 5, 8], list2[2::3]) - self.assertEquals([0, 3], list2[:6:3]) - self.assertEquals([2, 5, 8], list2[-8:9:3]) - self.assertEquals([], list2[100000:1000:-100]) + self.assertEqual([0, 1, 2], list1[:3]) + self.assertEqual([0, 1, 2, 3, "one", "two"], list1[:]) + self.assertEqual([3, "one", "two"], list1[3:]) + self.assertEqual(["one", "two"], list1[-2:]) + self.assertEqual([0, 1], list1[:-4]) + self.assertEqual([], list1[6:]) + self.assertEqual([], list1[4:2]) + + self.assertEqual([0, 2, "one"], list1[0:5:2]) + self.assertEqual([0, 2], list1[0:-3:2]) + self.assertEqual([0, 1, 2, 3, "one", "two"], list1[::]) + self.assertEqual([2, 3, "one", "two"], list1[2::]) + self.assertEqual([0, 1, 2, 3], list1[:4:]) + self.assertEqual([2, 3], list1[2:4:]) + self.assertEqual([0, 2, 4, 6, 8], list2[::2]) + self.assertEqual([2, 5, 8], list2[2::3]) + self.assertEqual([0, 3], list2[:6:3]) + self.assertEqual([2, 5, 8], list2[-8:9:3]) + self.assertEqual([], list2[100000:1000:-100]) list1[3] = 100 - self.assertEquals(100, list1[3]) + self.assertEqual(100, list1[3]) list1[-3] = 101 - self.assertEquals([0, 1, 2, 101, "one", "two"], list1) + self.assertEqual([0, 1, 2, 101, "one", "two"], list1) list1[5:] = [6, 7, 8] - self.assertEquals([6, 7, 8], list1[5:]) - self.assertEquals([0, 1, 2, 101, "one", 6, 7, 8], list1) + self.assertEqual([6, 7, 8], list1[5:]) + self.assertEqual([0, 1, 2, 101, "one", 6, 7, 8], list1) list1[2:4] = [-1, -2, -3, -4, -5] - self.assertEquals([0, 1, -1, -2, -3, -4, -5, "one", 6, 7, 8], list1) + self.assertEqual([0, 1, -1, -2, -3, -4, -5, "one", 6, 7, 8], list1) list1[0:-3] = [99] - self.assertEquals([99, 6, 7, 8], list1) + self.assertEqual([99, 6, 7, 8], list1) list2[0:6:2] = [100, 102, 104] - self.assertEquals([100, 1, 102, 3, 104, 5, 6, 7, 8, 9], list2) + self.assertEqual([100, 1, 102, 3, 104, 5, 6, 7, 8, 9], list2) list2[::3] = [200, 203, 206, 209] - self.assertEquals([200, 1, 102, 203, 104, 5, 206, 7, 8, 209], list2) + self.assertEqual([200, 1, 102, 203, 104, 5, 206, 7, 8, 209], list2) list2[::] = range(7) - self.assertEquals([0, 1, 2, 3, 4, 5, 6], list2) + self.assertEqual([0, 1, 2, 3, 4, 5, 6], list2) self.assertRaises(ValueError, assign, list2, 0, 5, 2, [100, 102, 104, 106]) del list2[2] - self.assertEquals([0, 1, 3, 4, 5, 6], list2) + self.assertEqual([0, 1, 3, 4, 5, 6], list2) del list2[-3] - self.assertEquals([0, 1, 3, 5, 6], list2) + self.assertEqual([0, 1, 3, 5, 6], list2) self.assertRaises(IndexError, delete, list2, 100) self.assertRaises(IndexError, delete, list2, -6) list2[:] = range(10) del list2[3:6] - self.assertEquals([0, 1, 2, 6, 7, 8, 9], list2) + self.assertEqual([0, 1, 2, 6, 7, 8, 9], list2) del list2[-2:] - self.assertEquals([0, 1, 2, 6, 7], list2) + self.assertEqual([0, 1, 2, 6, 7], list2) del list2[:2] - self.assertEquals([2, 6, 7], list2) + self.assertEqual([2, 6, 7], list2) list2[:] = range(10) del list2[2:8:2] - self.assertEquals([0, 1, 3, 5, 7, 8, 9], list2) + self.assertEqual([0, 1, 3, 5, 7, 8, 9], list2) def _test_add_radd_iadd(self, builder): """Run tests on __r/i/add__ of a list built with *builder*.""" list1 = builder(range(5)) list2 = builder(range(5, 10)) - self.assertEquals([0, 1, 2, 3, 4, 5, 6], list1 + [5, 6]) - self.assertEquals([0, 1, 2, 3, 4], list1) - self.assertEquals(list(range(10)), list1 + list2) - self.assertEquals([-2, -1, 0, 1, 2, 3, 4], [-2, -1] + list1) - self.assertEquals([0, 1, 2, 3, 4], list1) + self.assertEqual([0, 1, 2, 3, 4, 5, 6], list1 + [5, 6]) + self.assertEqual([0, 1, 2, 3, 4], list1) + self.assertEqual(list(range(10)), list1 + list2) + self.assertEqual([-2, -1, 0, 1, 2, 3, 4], [-2, -1] + list1) + self.assertEqual([0, 1, 2, 3, 4], list1) list1 += ["foo", "bar", "baz"] - self.assertEquals([0, 1, 2, 3, 4, "foo", "bar", "baz"], list1) + self.assertEqual([0, 1, 2, 3, 4, "foo", "bar", "baz"], list1) def _test_other_magic_methods(self, builder): """Run tests on other magic methods of a list built with *builder*.""" @@ -122,13 +122,13 @@ class TestSmartList(unittest.TestCase): list4 = builder([0, 1, 2]) if py3k: - self.assertEquals("[0, 1, 2, 3, 'one', 'two']", str(list1)) - self.assertEquals(b"[0, 1, 2, 3, 'one', 'two']", bytes(list1)) - self.assertEquals("[0, 1, 2, 3, 'one', 'two']", repr(list1)) + self.assertEqual("[0, 1, 2, 3, 'one', 'two']", str(list1)) + self.assertEqual(b"[0, 1, 2, 3, 'one', 'two']", bytes(list1)) + self.assertEqual("[0, 1, 2, 3, 'one', 'two']", repr(list1)) else: - self.assertEquals("[0, 1, 2, 3, u'one', u'two']", unicode(list1)) - self.assertEquals(b"[0, 1, 2, 3, u'one', u'two']", str(list1)) - self.assertEquals(b"[0, 1, 2, 3, u'one', u'two']", repr(list1)) + self.assertEqual("[0, 1, 2, 3, u'one', u'two']", unicode(list1)) + self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", str(list1)) + self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", repr(list1)) self.assertTrue(list1 < list3) self.assertTrue(list1 <= list3) @@ -164,42 +164,42 @@ class TestSmartList(unittest.TestCase): self.assertTrue(bool(list1)) self.assertFalse(bool(list2)) - self.assertEquals(6, len(list1)) - self.assertEquals(0, len(list2)) + self.assertEqual(6, len(list1)) + self.assertEqual(0, len(list2)) out = [] for obj in list1: out.append(obj) - self.assertEquals([0, 1, 2, 3, "one", "two"], out) + self.assertEqual([0, 1, 2, 3, "one", "two"], out) out = [] for ch in list2: out.append(ch) - self.assertEquals([], out) + self.assertEqual([], out) gen1 = iter(list1) out = [] for i in range(len(list1)): out.append(gen1.next()) self.assertRaises(StopIteration, gen1.next) - self.assertEquals([0, 1, 2, 3, "one", "two"], out) + self.assertEqual([0, 1, 2, 3, "one", "two"], out) gen2 = iter(list2) self.assertRaises(StopIteration, gen2.next) - self.assertEquals(["two", "one", 3, 2, 1, 0], list(reversed(list1))) - self.assertEquals([], list(reversed(list2))) + self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1))) + self.assertEqual([], list(reversed(list2))) self.assertTrue("one" in list1) self.assertTrue(3 in list1) self.assertFalse(10 in list1) self.assertFalse(0 in list2) - self.assertEquals([], list2 * 5) - self.assertEquals([], 5 * list2) - self.assertEquals([0, 1, 2, 0, 1, 2, 0, 1, 2], list4 * 3) - self.assertEquals([0, 1, 2, 0, 1, 2, 0, 1, 2], 3 * list4) + self.assertEqual([], list2 * 5) + self.assertEqual([], 5 * list2) + self.assertEqual([0, 1, 2, 0, 1, 2, 0, 1, 2], list4 * 3) + self.assertEqual([0, 1, 2, 0, 1, 2, 0, 1, 2], 3 * list4) list4 *= 2 - self.assertEquals([0, 1, 2, 0, 1, 2], list4) + self.assertEqual([0, 1, 2, 0, 1, 2], list4) def _test_list_methods(self, builder): """Run tests on the public methods of a list built with *builder*.""" @@ -210,60 +210,60 @@ class TestSmartList(unittest.TestCase): list1.append(5) list1.append(1) list1.append(2) - self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2], list1) + self.assertEqual([0, 1, 2, 3, 4, 5, 1, 2], list1) - self.assertEquals(0, list1.count(6)) - self.assertEquals(2, list1.count(1)) + self.assertEqual(0, list1.count(6)) + self.assertEqual(2, list1.count(1)) list1.extend(range(5, 8)) - self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) + self.assertEqual([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) - self.assertEquals(1, list1.index(1)) - self.assertEquals(6, list1.index(1, 3)) - self.assertEquals(6, list1.index(1, 3, 7)) + self.assertEqual(1, list1.index(1)) + self.assertEqual(6, list1.index(1, 3)) + self.assertEqual(6, list1.index(1, 3, 7)) self.assertRaises(ValueError, list1.index, 1, 3, 5) list1.insert(0, -1) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) + self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) list1.insert(-1, 6.5) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1) + self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1) list1.insert(13, 8) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1) - - self.assertEquals(8, list1.pop()) - self.assertEquals(7, list1.pop()) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1) - self.assertEquals(-1, list1.pop(0)) - self.assertEquals(5, list1.pop(5)) - self.assertEquals(6.5, list1.pop(-1)) - self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5, 6], list1) - self.assertEquals("foo", list2.pop()) + self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1) + + self.assertEqual(8, list1.pop()) + self.assertEqual(7, list1.pop()) + self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1) + self.assertEqual(-1, list1.pop(0)) + self.assertEqual(5, list1.pop(5)) + self.assertEqual(6.5, list1.pop(-1)) + self.assertEqual([0, 1, 2, 3, 4, 1, 2, 5, 6], list1) + self.assertEqual("foo", list2.pop()) self.assertRaises(IndexError, list2.pop) - self.assertEquals([], list2) + self.assertEqual([], list2) list1.remove(6) - self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5], list1) + self.assertEqual([0, 1, 2, 3, 4, 1, 2, 5], list1) list1.remove(1) - self.assertEquals([0, 2, 3, 4, 1, 2, 5], list1) + self.assertEqual([0, 2, 3, 4, 1, 2, 5], list1) list1.remove(1) - self.assertEquals([0, 2, 3, 4, 2, 5], list1) + self.assertEqual([0, 2, 3, 4, 2, 5], list1) self.assertRaises(ValueError, list1.remove, 1) list1.reverse() - self.assertEquals([5, 2, 4, 3, 2, 0], list1) + self.assertEqual([5, 2, 4, 3, 2, 0], list1) list1.sort() - self.assertEquals([0, 2, 2, 3, 4, 5], list1) + self.assertEqual([0, 2, 2, 3, 4, 5], list1) list1.sort(reverse=True) - self.assertEquals([5, 4, 3, 2, 2, 0], list1) + self.assertEqual([5, 4, 3, 2, 2, 0], list1) list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y)) # Distance from 3 - self.assertEquals([3, 4, 2, 2, 5, 0], list1) + self.assertEqual([3, 4, 2, 2, 5, 0], list1) list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y), reverse=True) - self.assertEquals([0, 5, 4, 2, 2, 3], list1) + self.assertEqual([0, 5, 4, 2, 2, 3], list1) list3.sort(key=lambda i: i[1]) - self.assertEquals([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) + self.assertEqual([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) list3.sort(key=lambda i: i[1], reverse=True) - self.assertEquals([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3) + self.assertEqual([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3) def test_docs(self): """make sure the methods of SmartList/_ListProxy have docstrings""" @@ -273,18 +273,18 @@ class TestSmartList(unittest.TestCase): expected = getattr(list, meth).__doc__ smartlist_doc = getattr(SmartList, meth).__doc__ listproxy_doc = getattr(_ListProxy, meth).__doc__ - self.assertEquals(expected, smartlist_doc) - self.assertEquals(expected, listproxy_doc) + self.assertEqual(expected, smartlist_doc) + self.assertEqual(expected, listproxy_doc) def test_doctest(self): """make sure the test embedded in SmartList's docstring passes""" parent = SmartList([0, 1, 2, 3]) - self.assertEquals([0, 1, 2, 3], parent) + self.assertEqual([0, 1, 2, 3], parent) child = parent[2:] - self.assertEquals([2, 3], child) + self.assertEqual([2, 3], child) child.append(4) - self.assertEquals([2, 3, 4], child) - self.assertEquals([0, 1, 2, 3, 4], parent) + self.assertEqual([2, 3, 4], child) + self.assertEqual([0, 1, 2, 3, 4], parent) def test_parent_get_set_del(self): """make sure SmartList's getitem/setitem/delitem work""" @@ -343,46 +343,46 @@ class TestSmartList(unittest.TestCase): parent.append(6) child1.append(7) child2.append(4.5) - self.assertEquals([0, 1, 2, 3, 4, 4.5, 5, 6, 7], parent) - self.assertEquals([2, 3, 4, 4.5, 5, 6, 7], child1) - self.assertEquals([2, 3, 4, 4.5], child2) + self.assertEqual([0, 1, 2, 3, 4, 4.5, 5, 6, 7], parent) + self.assertEqual([2, 3, 4, 4.5, 5, 6, 7], child1) + self.assertEqual([2, 3, 4, 4.5], child2) parent.insert(0, -1) parent.insert(4, 2.5) parent.insert(10, 6.5) - self.assertEquals([-1, 0, 1, 2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], parent) - self.assertEquals([2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], child1) - self.assertEquals([2, 2.5, 3, 4, 4.5], child2) + self.assertEqual([-1, 0, 1, 2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], parent) + self.assertEqual([2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], child1) + self.assertEqual([2, 2.5, 3, 4, 4.5], child2) - self.assertEquals(7, parent.pop()) - self.assertEquals(6.5, child1.pop()) - self.assertEquals(4.5, child2.pop()) - self.assertEquals([-1, 0, 1, 2, 2.5, 3, 4, 5, 6], parent) - self.assertEquals([2, 2.5, 3, 4, 5, 6], child1) - self.assertEquals([2, 2.5, 3, 4], child2) + self.assertEqual(7, parent.pop()) + self.assertEqual(6.5, child1.pop()) + self.assertEqual(4.5, child2.pop()) + self.assertEqual([-1, 0, 1, 2, 2.5, 3, 4, 5, 6], parent) + self.assertEqual([2, 2.5, 3, 4, 5, 6], child1) + self.assertEqual([2, 2.5, 3, 4], child2) parent.remove(-1) child1.remove(2.5) - self.assertEquals([0, 1, 2, 3, 4, 5, 6], parent) - self.assertEquals([2, 3, 4, 5, 6], child1) - self.assertEquals([2, 3, 4], child2) + self.assertEqual([0, 1, 2, 3, 4, 5, 6], parent) + self.assertEqual([2, 3, 4, 5, 6], child1) + self.assertEqual([2, 3, 4], child2) - self.assertEquals(0, parent.pop(0)) - self.assertEquals([1, 2, 3, 4, 5, 6], parent) - self.assertEquals([2, 3, 4, 5, 6], child1) - self.assertEquals([2, 3, 4], child2) + self.assertEqual(0, parent.pop(0)) + self.assertEqual([1, 2, 3, 4, 5, 6], parent) + self.assertEqual([2, 3, 4, 5, 6], child1) + self.assertEqual([2, 3, 4], child2) child2.reverse() - self.assertEquals([1, 4, 3, 2, 5, 6], parent) - self.assertEquals([4, 3, 2, 5, 6], child1) - self.assertEquals([4, 3, 2], child2) + self.assertEqual([1, 4, 3, 2, 5, 6], parent) + self.assertEqual([4, 3, 2, 5, 6], child1) + self.assertEqual([4, 3, 2], child2) parent.extend([7, 8]) child1.extend([8.1, 8.2]) child2.extend([1.9, 1.8]) - self.assertEquals([1, 4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], parent) - self.assertEquals([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) - self.assertEquals([4, 3, 2, 1.9, 1.8], child2) + self.assertEqual([1, 4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], parent) + self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) + self.assertEqual([4, 3, 2, 1.9, 1.8], child2) if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 7b99995..6ef6344 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -56,17 +56,17 @@ class TestStringMixIn(unittest.TestCase): for meth in methods: expected = getattr(str, meth).__doc__ actual = getattr(StringMixIn, meth).__doc__ - self.assertEquals(expected, actual) + self.assertEqual(expected, actual) def test_types(self): """make sure StringMixIns convert to different types correctly""" fstr = _FakeString("fake string") - self.assertEquals(str(fstr), "fake string") - self.assertEquals(bytes(fstr), b"fake string") + self.assertEqual(str(fstr), "fake string") + self.assertEqual(bytes(fstr), b"fake string") if py3k: - self.assertEquals(repr(fstr), "'fake string'") + self.assertEqual(repr(fstr), "'fake string'") else: - self.assertEquals(repr(fstr), b"u'fake string'") + self.assertEqual(repr(fstr), b"u'fake string'") self.assertIsInstance(str(fstr), str) self.assertIsInstance(bytes(fstr), bytes) @@ -119,18 +119,18 @@ class TestStringMixIn(unittest.TestCase): self.assertTrue(str1) self.assertFalse(str2) - self.assertEquals(11, len(str1)) - self.assertEquals(0, len(str2)) + self.assertEqual(11, len(str1)) + self.assertEqual(0, len(str2)) out = [] for ch in str1: out.append(ch) - self.assertEquals(expected, out) + self.assertEqual(expected, out) out = [] for ch in str2: out.append(ch) - self.assertEquals([], out) + self.assertEqual([], out) gen1 = iter(str1) gen2 = iter(str2) @@ -141,16 +141,16 @@ class TestStringMixIn(unittest.TestCase): for i in range(len(str1)): out.append(gen1.next()) self.assertRaises(StopIteration, gen1.next) - self.assertEquals(expected, out) + self.assertEqual(expected, out) self.assertRaises(StopIteration, gen2.next) - self.assertEquals("gnirts ekaf", "".join(list(reversed(str1)))) - self.assertEquals([], list(reversed(str2))) + self.assertEqual("gnirts ekaf", "".join(list(reversed(str1)))) + self.assertEqual([], list(reversed(str2))) - self.assertEquals("f", str1[0]) - self.assertEquals(" ", str1[4]) - self.assertEquals("g", str1[10]) - self.assertEquals("n", str1[-2]) + self.assertEqual("f", str1[0]) + self.assertEqual(" ", str1[4]) + self.assertEqual("g", str1[10]) + self.assertEqual("n", str1[-2]) self.assertRaises(IndexError, lambda: str1[11]) self.assertRaises(IndexError, lambda: str2[0]) @@ -165,75 +165,75 @@ class TestStringMixIn(unittest.TestCase): def test_other_methods(self): """test the remaining non-magic methods of StringMixIn""" str1 = _FakeString("fake string") - self.assertEquals("Fake string", str1.capitalize()) + self.assertEqual("Fake string", str1.capitalize()) - self.assertEquals(" fake string ", str1.center(15)) - self.assertEquals(" fake string ", str1.center(16)) - self.assertEquals("qqfake stringqq", str1.center(15, "q")) + self.assertEqual(" fake string ", str1.center(15)) + self.assertEqual(" fake string ", str1.center(16)) + self.assertEqual("qqfake stringqq", str1.center(15, "q")) - self.assertEquals(1, str1.count("e")) - self.assertEquals(0, str1.count("z")) - self.assertEquals(1, str1.count("r", 7)) - self.assertEquals(0, str1.count("r", 8)) - self.assertEquals(1, str1.count("r", 5, 9)) - self.assertEquals(0, str1.count("r", 5, 7)) + self.assertEqual(1, str1.count("e")) + self.assertEqual(0, str1.count("z")) + self.assertEqual(1, str1.count("r", 7)) + self.assertEqual(0, str1.count("r", 8)) + self.assertEqual(1, str1.count("r", 5, 9)) + self.assertEqual(0, str1.count("r", 5, 7)) if not py3k: str2 = _FakeString("fo") - self.assertEquals(str1, str1.decode()) + self.assertEqual(str1, str1.decode()) actual = _FakeString("\\U00010332\\U0001033f\\U00010344") - self.assertEquals("𐌲𐌿𐍄", actual.decode("unicode_escape")) + self.assertEqual("𐌲𐌿𐍄", actual.decode("unicode_escape")) self.assertRaises(UnicodeError, str2.decode, "punycode") - self.assertEquals("", str2.decode("punycode", "ignore")) + self.assertEqual("", str2.decode("punycode", "ignore")) str3 = _FakeString("𐌲𐌿𐍄") - self.assertEquals(b"fake string", str1.encode()) - self.assertEquals(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", + self.assertEqual(b"fake string", str1.encode()) + self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", str3.encode("utf8")) - self.assertEquals(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", + self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", str3.encode(encoding="utf8")) self.assertRaises(UnicodeEncodeError, str3.encode) self.assertRaises(UnicodeEncodeError, str3.encode, "ascii") self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict") self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict") - self.assertEquals("", str3.encode("ascii", "ignore")) - self.assertEquals("", str3.encode(errors="ignore")) + self.assertEqual("", str3.encode("ascii", "ignore")) + self.assertEqual("", str3.encode(errors="ignore")) self.assertTrue(str1.endswith("ing")) self.assertFalse(str1.endswith("ingh")) str4 = _FakeString("\tfoobar") - self.assertEquals("fake string", str1) - self.assertEquals(" foobar", str4.expandtabs()) - self.assertEquals(" foobar", str4.expandtabs(4)) + self.assertEqual("fake string", str1) + self.assertEqual(" foobar", str4.expandtabs()) + self.assertEqual(" foobar", str4.expandtabs(4)) - self.assertEquals(3, str1.find("e")) - self.assertEquals(-1, str1.find("z")) - self.assertEquals(7, str1.find("r", 7)) - self.assertEquals(-1, str1.find("r", 8)) - self.assertEquals(7, str1.find("r", 5, 9)) - self.assertEquals(-1, str1.find("r", 5, 7)) + self.assertEqual(3, str1.find("e")) + self.assertEqual(-1, str1.find("z")) + self.assertEqual(7, str1.find("r", 7)) + self.assertEqual(-1, str1.find("r", 8)) + self.assertEqual(7, str1.find("r", 5, 9)) + self.assertEqual(-1, str1.find("r", 5, 7)) str5 = _FakeString("foo{0}baz") str6 = _FakeString("foo{abc}baz") str7 = _FakeString("foo{0}{abc}buzz") str8 = _FakeString("{0}{1}") - self.assertEquals("fake string", str1.format()) - self.assertEquals("foobarbaz", str5.format("bar")) - self.assertEquals("foobarbaz", str6.format(abc="bar")) - self.assertEquals("foobarbazbuzz", str7.format("bar", abc="baz")) + self.assertEqual("fake string", str1.format()) + self.assertEqual("foobarbaz", str5.format("bar")) + self.assertEqual("foobarbaz", str6.format(abc="bar")) + self.assertEqual("foobarbazbuzz", str7.format("bar", abc="baz")) self.assertRaises(IndexError, str8.format, "abc") if py3k: - self.assertEquals("fake string", str1.format_map({})) - self.assertEquals("foobarbaz", str6.format_map({"abc": "bar"})) + self.assertEqual("fake string", str1.format_map({})) + self.assertEqual("foobarbaz", str6.format_map({"abc": "bar"})) self.assertRaises(ValueError, str5.format_map, {0: "abc"}) - self.assertEquals(3, str1.index("e")) + self.assertEqual(3, str1.index("e")) self.assertRaises(ValueError, str1.index, "z") - self.assertEquals(7, str1.index("r", 7)) + self.assertEqual(7, str1.index("r", 7)) self.assertRaises(ValueError, str1.index, "r", 8) - self.assertEquals(7, str1.index("r", 5, 9)) + self.assertEqual(7, str1.index("r", 5, 9)) self.assertRaises(ValueError, str1.index, "r", 5, 7) str9 = _FakeString("foobar") @@ -303,120 +303,120 @@ class TestStringMixIn(unittest.TestCase): self.assertFalse(str15.isupper()) self.assertTrue(str21.isupper()) - self.assertEquals("foobar", str15.join(["foo", "bar"])) - self.assertEquals("foo123bar123baz", str12.join(("foo", "bar", "baz"))) + self.assertEqual("foobar", str15.join(["foo", "bar"])) + self.assertEqual("foo123bar123baz", str12.join(("foo", "bar", "baz"))) - self.assertEquals("fake string ", str1.ljust(15)) - self.assertEquals("fake string ", str1.ljust(16)) - self.assertEquals("fake stringqqqq", str1.ljust(15, "q")) + self.assertEqual("fake string ", str1.ljust(15)) + self.assertEqual("fake string ", str1.ljust(16)) + self.assertEqual("fake stringqqqq", str1.ljust(15, "q")) str22 = _FakeString("ß") - self.assertEquals("", str15.lower()) - self.assertEquals("foobar", str16.lower()) - self.assertEquals("ß", str22.lower()) + self.assertEqual("", str15.lower()) + self.assertEqual("foobar", str16.lower()) + self.assertEqual("ß", str22.lower()) if py3k: - self.assertEquals("", str15.casefold()) - self.assertEquals("foobar", str16.casefold()) - self.assertEquals("ss", str22.casefold()) + self.assertEqual("", str15.casefold()) + self.assertEqual("foobar", str16.casefold()) + self.assertEqual("ss", str22.casefold()) str23 = _FakeString(" fake string ") - self.assertEquals("fake string", str1.lstrip()) - self.assertEquals("fake string ", str23.lstrip()) - self.assertEquals("ke string", str1.lstrip("abcdef")) + self.assertEqual("fake string", str1.lstrip()) + self.assertEqual("fake string ", str23.lstrip()) + self.assertEqual("ke string", str1.lstrip("abcdef")) - self.assertEquals(("fa", "ke", " string"), str1.partition("ke")) - self.assertEquals(("fake string", "", ""), str1.partition("asdf")) + self.assertEqual(("fa", "ke", " string"), str1.partition("ke")) + self.assertEqual(("fake string", "", ""), str1.partition("asdf")) str24 = _FakeString("boo foo moo") - self.assertEquals("real string", str1.replace("fake", "real")) - self.assertEquals("bu fu moo", str24.replace("oo", "u", 2)) + self.assertEqual("real string", str1.replace("fake", "real")) + self.assertEqual("bu fu moo", str24.replace("oo", "u", 2)) - self.assertEquals(3, str1.rfind("e")) - self.assertEquals(-1, str1.rfind("z")) - self.assertEquals(7, str1.rfind("r", 7)) - self.assertEquals(-1, str1.rfind("r", 8)) - self.assertEquals(7, str1.rfind("r", 5, 9)) - self.assertEquals(-1, str1.rfind("r", 5, 7)) + self.assertEqual(3, str1.rfind("e")) + self.assertEqual(-1, str1.rfind("z")) + self.assertEqual(7, str1.rfind("r", 7)) + self.assertEqual(-1, str1.rfind("r", 8)) + self.assertEqual(7, str1.rfind("r", 5, 9)) + self.assertEqual(-1, str1.rfind("r", 5, 7)) - self.assertEquals(3, str1.rindex("e")) + self.assertEqual(3, str1.rindex("e")) self.assertRaises(ValueError, str1.rindex, "z") - self.assertEquals(7, str1.rindex("r", 7)) + self.assertEqual(7, str1.rindex("r", 7)) self.assertRaises(ValueError, str1.rindex, "r", 8) - self.assertEquals(7, str1.rindex("r", 5, 9)) + self.assertEqual(7, str1.rindex("r", 5, 9)) self.assertRaises(ValueError, str1.rindex, "r", 5, 7) - self.assertEquals(" fake string", str1.rjust(15)) - self.assertEquals(" fake string", str1.rjust(16)) - self.assertEquals("qqqqfake string", str1.rjust(15, "q")) + self.assertEqual(" fake string", str1.rjust(15)) + self.assertEqual(" fake string", str1.rjust(16)) + self.assertEqual("qqqqfake string", str1.rjust(15, "q")) - self.assertEquals(("fa", "ke", " string"), str1.rpartition("ke")) - self.assertEquals(("", "", "fake string"), str1.rpartition("asdf")) + self.assertEqual(("fa", "ke", " string"), str1.rpartition("ke")) + self.assertEqual(("", "", "fake string"), str1.rpartition("asdf")) str25 = _FakeString(" this is a sentence with whitespace ") actual = ["this", "is", "a", "sentence", "with", "whitespace"] - self.assertEquals(actual, str25.rsplit()) - self.assertEquals(actual, str25.rsplit(None)) + self.assertEqual(actual, str25.rsplit()) + self.assertEqual(actual, str25.rsplit(None)) actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", "", "whitespace", ""] - self.assertEquals(actual, str25.rsplit(" ")) + self.assertEqual(actual, str25.rsplit(" ")) actual = [" this is a", "sentence", "with", "whitespace"] - self.assertEquals(actual, str25.rsplit(None, 3)) + self.assertEqual(actual, str25.rsplit(None, 3)) actual = [" this is a sentence with", "", "whitespace", ""] - self.assertEquals(actual, str25.rsplit(" ", 3)) + self.assertEqual(actual, str25.rsplit(" ", 3)) if py3k: - self.assertEquals(actual, str25.rsplit(maxsplit=3)) + self.assertEqual(actual, str25.rsplit(maxsplit=3)) - self.assertEquals("fake string", str1.rstrip()) - self.assertEquals(" fake string", str23.rstrip()) - self.assertEquals("fake stri", str1.rstrip("ngr")) + self.assertEqual("fake string", str1.rstrip()) + self.assertEqual(" fake string", str23.rstrip()) + self.assertEqual("fake stri", str1.rstrip("ngr")) actual = ["this", "is", "a", "sentence", "with", "whitespace"] - self.assertEquals(actual, str25.split()) - self.assertEquals(actual, str25.split(None)) + self.assertEqual(actual, str25.split()) + self.assertEqual(actual, str25.split(None)) actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", "", "whitespace", ""] - self.assertEquals(actual, str25.split(" ")) + self.assertEqual(actual, str25.split(" ")) actual = ["this", "is", "a", "sentence with whitespace "] - self.assertEquals(actual, str25.split(None, 3)) + self.assertEqual(actual, str25.split(None, 3)) actual = ["", "", "", "this is a sentence with whitespace "] - self.assertEquals(actual, str25.split(" ", 3)) + self.assertEqual(actual, str25.split(" ", 3)) if py3k: - self.assertEquals(actual, str25.split(maxsplit=3)) + self.assertEqual(actual, str25.split(maxsplit=3)) str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") - self.assertEquals(["lines", "of", "text", "are", "presented", "here"], + self.assertEqual(["lines", "of", "text", "are", "presented", "here"], str26.splitlines()) - self.assertEquals(["lines\n", "of\n", "text\r\n", "are\r\n", + self.assertEqual(["lines\n", "of\n", "text\r\n", "are\r\n", "presented\n", "here"], str26.splitlines(True)) self.assertTrue(str1.startswith("fake")) self.assertFalse(str1.startswith("faker")) - self.assertEquals("fake string", str1.strip()) - self.assertEquals("fake string", str23.strip()) - self.assertEquals("ke stri", str1.strip("abcdefngr")) + self.assertEqual("fake string", str1.strip()) + self.assertEqual("fake string", str23.strip()) + self.assertEqual("ke stri", str1.strip("abcdefngr")) - self.assertEquals("fOObAR", str16.swapcase()) + self.assertEqual("fOObAR", str16.swapcase()) - self.assertEquals("Fake String", str1.title()) + self.assertEqual("Fake String", str1.title()) if py3k: table1 = str.maketrans({97: "1", 101: "2", 105: "3", 111: "4", 117: "5"}) table2 = str.maketrans("aeiou", "12345") table3 = str.maketrans("aeiou", "12345", "rts") - self.assertEquals("f1k2 str3ng", str1.translate(table1)) - self.assertEquals("f1k2 str3ng", str1.translate(table2)) - self.assertEquals("f1k2 3ng", str1.translate(table3)) + self.assertEqual("f1k2 str3ng", str1.translate(table1)) + self.assertEqual("f1k2 str3ng", str1.translate(table2)) + self.assertEqual("f1k2 3ng", str1.translate(table3)) else: table = {97: "1", 101: "2", 105: "3", 111: "4", 117: "5"} - self.assertEquals("f1k2 str3ng", str1.translate(table)) + self.assertEqual("f1k2 str3ng", str1.translate(table)) - self.assertEquals("", str15.upper()) - self.assertEquals("FOOBAR", str16.upper()) + self.assertEqual("", str15.upper()) + self.assertEqual("FOOBAR", str16.upper()) - self.assertEquals("123", str12.zfill(3)) - self.assertEquals("000123", str12.zfill(6)) + self.assertEqual("123", str12.zfill(3)) + self.assertEqual("000123", str12.zfill(6)) if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/test_tokens.py b/tests/test_tokens.py index 5a18b8e..1449ad2 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -42,8 +42,8 @@ class TestTokens(unittest.TestCase): token1 = tokens.Token() token2 = tokens.Token(foo="bar", baz=123) - self.assertEquals("bar", token2.foo) - self.assertEquals(123, token2.baz) + self.assertEqual("bar", token2.foo) + self.assertEqual(123, token2.baz) self.assertRaises(KeyError, lambda: token1.foo) self.assertRaises(KeyError, lambda: token2.bar) @@ -51,8 +51,8 @@ class TestTokens(unittest.TestCase): token2.foo = "ham" del token2.baz - self.assertEquals("eggs", token1.spam) - self.assertEquals("ham", token2.foo) + self.assertEqual("eggs", token1.spam) + self.assertEqual("ham", token2.foo) self.assertRaises(KeyError, lambda: token2.baz) self.assertRaises(KeyError, delattr, token2, "baz") @@ -63,15 +63,15 @@ class TestTokens(unittest.TestCase): token3 = tokens.Text(text="earwig" * 100) hundredchars = ("earwig" * 100)[:97] + "..." - self.assertEquals("Token()", repr(token1)) + self.assertEqual("Token()", repr(token1)) if py3k: token2repr = "Token(foo='bar', baz=123)" token3repr = "Text(text='" + hundredchars + "')" else: token2repr = "Token(foo=u'bar', baz=123)" token3repr = "Text(text=u'" + hundredchars + "')" - self.assertEquals(token2repr, repr(token2)) - self.assertEquals(token3repr, repr(token3)) + self.assertEqual(token2repr, repr(token2)) + self.assertEqual(token3repr, repr(token3)) def test_equality(self): """check that equivalent tokens are considered equal""" @@ -82,10 +82,10 @@ class TestTokens(unittest.TestCase): token5 = tokens.Text(text="asdf") token6 = tokens.TemplateOpen(text="asdf") - self.assertEquals(token1, token2) - self.assertEquals(token2, token1) - self.assertEquals(token4, token5) - self.assertEquals(token5, token4) + self.assertEqual(token1, token2) + self.assertEqual(token2, token1) + self.assertEqual(token4, token5) + self.assertEqual(token5, token4) self.assertNotEquals(token1, token3) self.assertNotEquals(token2, token3) self.assertNotEquals(token4, token6) @@ -99,7 +99,7 @@ class TestTokens(unittest.TestCase): tokens.Text(text="earwig") ] for token in tests: - self.assertEquals(token, eval(repr(token), vars(tokens))) + self.assertEqual(token, eval(repr(token), vars(tokens))) if __name__ == "__main__": unittest.main(verbosity=2) From 97a837c1e8d8fbaae71360f442f53ca7bd81a58f Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Mar 2013 01:36:02 -0400 Subject: [PATCH 108/180] Implement test_parser(). Clean up a few lambdas in TestSmartList. --- tests/test_parser.py | 62 +++++++++++++++++++++++++++++++++++++++++++++++- tests/test_smart_list.py | 8 +++---- 2 files changed, 65 insertions(+), 5 deletions(-) diff --git a/tests/test_parser.py b/tests/test_parser.py index 5ea2b49..6e775ce 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -23,8 +23,68 @@ from __future__ import unicode_literals import unittest +from mwparserfromhell.compat import range +from mwparserfromhell.nodes import Template, Text, Wikilink +from mwparserfromhell.nodes.extras import Parameter +from mwparserfromhell.parser import Parser +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.wikicode import Wikicode + class TestParser(unittest.TestCase): - pass + """Tests for the Parser class itself, which tokenizes and builds nodes.""" + + def assertNodesEqual(self, expected, actual): + """Assert that two Nodes are the same type and have the same data.""" + self.assertIs(type(expected), type(actual)) + if isinstance(expected, Text): + self.assertEqual(expected.value, actual.value) + elif isinstance(expected, Template): + self.assertWikicodeEqual(expected.name, actual.name) + length = len(expected.params) + self.assertEqual(length, len(actual.params)) + for i in range(length): + exp_param = expected.params[i] + act_param = actual.params[i] + self.assertWikicodeEqual(exp_param.name, act_param.name) + self.assertWikicodeEqual(exp_param.value, act_param.value) + self.assertIs(exp_param.showkey, act_param.showkey) + elif isinstance(expected, Wikilink): + self.assertWikicodeEqual(expected.title, actual.title) + if expected.text is not None: + self.assertWikicodeEqual(expected.text, actual.text) + else: + self.assertIs(None, actual.text) + + def assertWikicodeEqual(self, expected, actual): + """Assert that two Wikicode objects have the same data.""" + self.assertIsInstance(actual, Wikicode) + length = len(expected.nodes) + self.assertEqual(length, len(actual.nodes)) + for i in range(length): + self.assertNodesEqual(expected.get(i), actual.get(i)) + + def test_parser(self): + """integration test for parsing overall""" + text = "this is text; {{this|is=a|template={{with|[[links]]|in}}it}}" + wrap = lambda L: Wikicode(SmartList(L)) + expected = wrap([ + Text("this is text; "), + Template(wrap([Text("this")]), [ + Parameter(wrap([Text("is")]), wrap([Text("a")])), + Parameter(wrap([Text("template")]), wrap([ + Template(wrap([Text("with")]), [ + Parameter(wrap([Text("1")]), + wrap([Wikilink(wrap([Text("links")]))]), + showkey=False), + Parameter(wrap([Text("2")]), + wrap([Text("in")]), showkey=False) + ]), + Text("it") + ])) + ]) + ]) + actual = Parser(text).parse() + self.assertWikicodeEqual(expected, actual) if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 680de9d..d821ccd 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -288,19 +288,19 @@ class TestSmartList(unittest.TestCase): def test_parent_get_set_del(self): """make sure SmartList's getitem/setitem/delitem work""" - self._test_get_set_del_item(lambda L: SmartList(L)) + self._test_get_set_del_item(SmartList) def test_parent_add(self): """make sure SmartList's add/radd/iadd work""" - self._test_add_radd_iadd(lambda L: SmartList(L)) + self._test_add_radd_iadd(SmartList) def test_parent_unaffected_magics(self): """sanity checks against SmartList features that were not modified""" - self._test_other_magic_methods(lambda L: SmartList(L)) + self._test_other_magic_methods(SmartList) def test_parent_methods(self): """make sure SmartList's non-magic methods work, like append()""" - self._test_list_methods(lambda L: SmartList(L)) + self._test_list_methods(SmartList) def test_child_get_set_del(self): """make sure _ListProxy's getitem/setitem/delitem work""" From f8032695146f032108c1b736631f546712689372 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Mar 2013 17:19:08 -0400 Subject: [PATCH 109/180] Add a USES_C field to the tokenizers; add TestParser.test_use_c() --- mwparserfromhell/parser/tokenizer.c | 2 ++ mwparserfromhell/parser/tokenizer.py | 1 + tests/test_parser.py | 13 ++++++++++--- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 8c96500..d3abb22 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -1387,6 +1387,8 @@ init_tokenizer(void) module = Py_InitModule("_tokenizer", module_methods); Py_INCREF(&TokenizerType); PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType); + Py_INCREF(Py_True); + PyDict_SetItemString(TokenizerType.tp_dict, "USES_C", Py_True); tempmod = PyImport_ImportModule("htmlentitydefs"); if (!tempmod) diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 67638ca..0bf0322 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -38,6 +38,7 @@ class BadRoute(Exception): class Tokenizer(object): """Creates a list of tokens from a string of wikicode.""" + USES_C = False START = object() END = object() MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", diff --git a/tests/test_parser.py b/tests/test_parser.py index 6e775ce..4f718c8 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -23,10 +23,10 @@ from __future__ import unicode_literals import unittest +from mwparserfromhell import parser from mwparserfromhell.compat import range from mwparserfromhell.nodes import Template, Text, Wikilink from mwparserfromhell.nodes.extras import Parameter -from mwparserfromhell.parser import Parser from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode @@ -63,7 +63,14 @@ class TestParser(unittest.TestCase): for i in range(length): self.assertNodesEqual(expected.get(i), actual.get(i)) - def test_parser(self): + def test_use_c(self): + """make sure the correct tokenizer is used""" + if parser.use_c: + self.assertTrue(parser.Parser(None)._tokenizer.USES_C) + parser.use_c = False + self.assertFalse(parser.Parser(None)._tokenizer.USES_C) + + def test_parsing(self): """integration test for parsing overall""" text = "this is text; {{this|is=a|template={{with|[[links]]|in}}it}}" wrap = lambda L: Wikicode(SmartList(L)) @@ -83,7 +90,7 @@ class TestParser(unittest.TestCase): ])) ]) ]) - actual = Parser(text).parse() + actual = parser.Parser(text).parse() self.assertWikicodeEqual(expected, actual) if __name__ == "__main__": From 27a3503aa113c12971fab6a1d8fd676180b70449 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Mar 2013 17:22:37 -0400 Subject: [PATCH 110/180] Add test_uses_c() to TestPyTokenizer and TestCTokenizer --- tests/test_ctokenizer.py | 8 +++++++- tests/test_pytokenizer.py | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 4dbeceb..7ef8975 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -23,6 +23,8 @@ from __future__ import unicode_literals import unittest +from mwparserfromhell.parser._tokenizer import CTokenizer + from _test_tokenizer import TokenizerTestCase class TestCTokenizer(TokenizerTestCase, unittest.TestCase): @@ -30,8 +32,12 @@ class TestCTokenizer(TokenizerTestCase, unittest.TestCase): @classmethod def setUpClass(cls): - from mwparserfromhell.parser._tokenizer import CTokenizer cls.tokenizer = CTokenizer + def test_uses_c(self): + """make sure the C tokenizer identifies as using a C extension""" + self.assertTrue(CTokenizer.USES_C) + self.assertTrue(CTokenizer().USES_C) + if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index 73e6fe7..3e598bf 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -23,6 +23,8 @@ from __future__ import unicode_literals import unittest +from mwparserfromhell.parser.tokenizer import Tokenizer + from _test_tokenizer import TokenizerTestCase class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): @@ -30,8 +32,12 @@ class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): @classmethod def setUpClass(cls): - from mwparserfromhell.parser.tokenizer import Tokenizer cls.tokenizer = Tokenizer + def test_uses_c(self): + """make sure the Python tokenizer identifies as not using C""" + self.assertFalse(Tokenizer.USES_C) + self.assertFalse(Tokenizer().USES_C) + if __name__ == "__main__": unittest.main(verbosity=2) From 5ca6f6c755bb8b3d3a3190bab4cf6f0a1eb6b2a7 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Mar 2013 17:40:39 -0400 Subject: [PATCH 111/180] Skip test_readme_5() if web query fails. --- tests/test_docs.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_docs.py b/tests/test_docs.py index 075b0a7..971c5d1 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -113,7 +113,10 @@ class TestDocs(unittest.TestCase): title = "Test" data = {"action": "query", "prop": "revisions", "rvlimit": 1, "rvprop": "content", "format": "json", "titles": title} - raw = urllib.urlopen(url1, urllib.urlencode(data)).read() + try: + raw = urllib.urlopen(url1, urllib.urlencode(data)).read() + except IOError: + self.skipTest("cannot continue because of unsuccessful web call") res = json.loads(raw) text = res["query"]["pages"].values()[0]["revisions"][0]["*"] expected = urllib.urlopen(url2.format(title)).read().decode("utf8") From 7f87a1c4b371f813d5006b25cf39f2b40b4dc58e Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Mar 2013 19:39:12 -0400 Subject: [PATCH 112/180] Apply bugfixes so that some tests pass on Python 3. - Skip CTokenizer tests if CTokenizer is not available. - TestStringMixin: Don't make assumptions about default encoding. - Add urllib stuff to mwparserfromhell.compat. - Fix compat.py's line endings. - gen.next() -> next(gen) - assert*Equals() -> assert*Equal() --- mwparserfromhell/compat.py | 69 +++++++++++++++++++++------------------- mwparserfromhell/string_mixin.py | 2 +- tests/test_ctokenizer.py | 6 +++- tests/test_docs.py | 14 ++++---- tests/test_smart_list.py | 6 ++-- tests/test_string_mixin.py | 33 ++++++++++++------- tests/test_tokens.py | 17 ++++++---- 7 files changed, 85 insertions(+), 62 deletions(-) diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py index 48b9807..34870e6 100755 --- a/mwparserfromhell/compat.py +++ b/mwparserfromhell/compat.py @@ -1,33 +1,36 @@ -# -*- coding: utf-8 -*- - -""" -Implements support for both Python 2 and Python 3 by defining common types in -terms of their Python 2/3 variants. For example, :py:class:`str` is set to -:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise, -:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These -types are meant to be imported directly from within the parser's modules. -""" - -import sys - -py3k = sys.version_info[0] == 3 - -if py3k: - bytes = bytes - str = str - basestring = str - range = range - maxsize = sys.maxsize - import html.entities as htmlentities - from io import StringIO - -else: - bytes = str - str = unicode - basestring = basestring - range = xrange - maxsize = sys.maxint - import htmlentitydefs as htmlentities - from StringIO import StringIO - -del sys +# -*- coding: utf-8 -*- + +""" +Implements support for both Python 2 and Python 3 by defining common types in +terms of their Python 2/3 variants. For example, :py:class:`str` is set to +:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise, +:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These +types are meant to be imported directly from within the parser's modules. +""" + +import sys + +py3k = sys.version_info[0] == 3 + +if py3k: + bytes = bytes + str = str + basestring = str + range = range + maxsize = sys.maxsize + import html.entities as htmlentities + from io import StringIO + from urllib.parse import urlencode + from urllib.request import urlopen + +else: + bytes = str + str = unicode + basestring = basestring + range = xrange + maxsize = sys.maxint + import htmlentitydefs as htmlentities + from StringIO import StringIO + from urllib import urlencode, urlopen + +del sys diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index eee58b9..6bee9c4 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -252,8 +252,8 @@ class StringMixIn(object): return self.__unicode__().lstrip(chars) if py3k: - @inheritdoc @staticmethod + @inheritdoc def maketrans(self, x, y=None, z=None): if z is None: if y is None: diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 7ef8975..f21378c 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -23,10 +23,14 @@ from __future__ import unicode_literals import unittest -from mwparserfromhell.parser._tokenizer import CTokenizer +try: + from mwparserfromhell.parser._tokenizer import CTokenizer +except ImportError: + CTokenizer = None from _test_tokenizer import TokenizerTestCase +@unittest.skipUnless(CTokenizer, "C tokenizer not available") class TestCTokenizer(TokenizerTestCase, unittest.TestCase): """Test cases for the C tokenizer.""" diff --git a/tests/test_docs.py b/tests/test_docs.py index 971c5d1..3b23bb7 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -23,10 +23,9 @@ from __future__ import print_function, unicode_literals import json import unittest -import urllib import mwparserfromhell -from mwparserfromhell.compat import py3k, str, StringIO +from mwparserfromhell.compat import py3k, str, StringIO, urlencode, urlopen class TestDocs(unittest.TestCase): """Integration test cases for mwparserfromhell's documentation.""" @@ -114,12 +113,15 @@ class TestDocs(unittest.TestCase): data = {"action": "query", "prop": "revisions", "rvlimit": 1, "rvprop": "content", "format": "json", "titles": title} try: - raw = urllib.urlopen(url1, urllib.urlencode(data)).read() + raw = urlopen(url1, urlencode(data).encode("utf8")).read() + except IOError: + self.skipTest("cannot continue because of unsuccessful web call") + res = json.loads(raw.decode("utf8")) + text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"] + try: + expected = urlopen(url2.format(title)).read().decode("utf8") except IOError: self.skipTest("cannot continue because of unsuccessful web call") - res = json.loads(raw) - text = res["query"]["pages"].values()[0]["revisions"][0]["*"] - expected = urllib.urlopen(url2.format(title)).read().decode("utf8") actual = mwparserfromhell.parse(text) self.assertEqual(expected, actual) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index d821ccd..01caca7 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -180,11 +180,11 @@ class TestSmartList(unittest.TestCase): gen1 = iter(list1) out = [] for i in range(len(list1)): - out.append(gen1.next()) - self.assertRaises(StopIteration, gen1.next) + out.append(next(gen1)) + self.assertRaises(StopIteration, next, gen1) self.assertEqual([0, 1, 2, 3, "one", "two"], out) gen2 = iter(list2) - self.assertRaises(StopIteration, gen2.next) + self.assertRaises(StopIteration, next, gen2) self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1))) self.assertEqual([], list(reversed(list2))) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 6ef6344..6d10609 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -21,6 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals +from sys import getdefaultencoding from types import GeneratorType import unittest @@ -139,10 +140,10 @@ class TestStringMixIn(unittest.TestCase): out = [] for i in range(len(str1)): - out.append(gen1.next()) - self.assertRaises(StopIteration, gen1.next) + out.append(next(gen1)) + self.assertRaises(StopIteration, next, gen1) self.assertEqual(expected, out) - self.assertRaises(StopIteration, gen2.next) + self.assertRaises(StopIteration, next, gen2) self.assertEqual("gnirts ekaf", "".join(list(reversed(str1)))) self.assertEqual([], list(reversed(str2))) @@ -187,17 +188,25 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual("", str2.decode("punycode", "ignore")) str3 = _FakeString("𐌲𐌿𐍄") + actual = b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84" self.assertEqual(b"fake string", str1.encode()) - self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", - str3.encode("utf8")) - self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", - str3.encode(encoding="utf8")) - self.assertRaises(UnicodeEncodeError, str3.encode) + self.assertEqual(actual, str3.encode("utf-8")) + self.assertEqual(actual, str3.encode(encoding="utf-8")) + if getdefaultencoding() == "ascii": + self.assertRaises(UnicodeEncodeError, str3.encode) + elif getdefaultencoding() == "utf-8": + self.assertEqual(actual, str3.encode()) self.assertRaises(UnicodeEncodeError, str3.encode, "ascii") self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict") - self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict") - self.assertEqual("", str3.encode("ascii", "ignore")) - self.assertEqual("", str3.encode(errors="ignore")) + if getdefaultencoding() == "ascii": + self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict") + elif getdefaultencoding() == "utf-8": + self.assertEqual(actual, str3.encode(errors="strict")) + self.assertEqual(b"", str3.encode("ascii", "ignore")) + if getdefaultencoding() == "ascii": + self.assertEqual(b"", str3.encode(errors="ignore")) + elif getdefaultencoding() == "utf-8": + self.assertEqual(actual, str3.encode(errors="ignore")) self.assertTrue(str1.endswith("ing")) self.assertFalse(str1.endswith("ingh")) @@ -364,6 +373,7 @@ class TestStringMixIn(unittest.TestCase): actual = [" this is a sentence with", "", "whitespace", ""] self.assertEqual(actual, str25.rsplit(" ", 3)) if py3k: + actual = [" this is a", "sentence", "with", "whitespace"] self.assertEqual(actual, str25.rsplit(maxsplit=3)) self.assertEqual("fake string", str1.rstrip()) @@ -381,6 +391,7 @@ class TestStringMixIn(unittest.TestCase): actual = ["", "", "", "this is a sentence with whitespace "] self.assertEqual(actual, str25.split(" ", 3)) if py3k: + actual = ["this", "is", "a", "sentence with whitespace "] self.assertEqual(actual, str25.split(maxsplit=3)) str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") diff --git a/tests/test_tokens.py b/tests/test_tokens.py index 1449ad2..4620982 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -65,12 +65,15 @@ class TestTokens(unittest.TestCase): self.assertEqual("Token()", repr(token1)) if py3k: - token2repr = "Token(foo='bar', baz=123)" + token2repr1 = "Token(foo='bar', baz=123)" + token2repr2 = "Token(baz=123, foo='bar')" token3repr = "Text(text='" + hundredchars + "')" else: - token2repr = "Token(foo=u'bar', baz=123)" + token2repr1 = "Token(foo=u'bar', baz=123)" + token2repr2 = "Token(baz=123, foo=u'bar')" token3repr = "Text(text=u'" + hundredchars + "')" - self.assertEqual(token2repr, repr(token2)) + token2repr = repr(token2) + self.assertTrue(token2repr == token2repr1 or token2repr == token2repr2) self.assertEqual(token3repr, repr(token3)) def test_equality(self): @@ -86,10 +89,10 @@ class TestTokens(unittest.TestCase): self.assertEqual(token2, token1) self.assertEqual(token4, token5) self.assertEqual(token5, token4) - self.assertNotEquals(token1, token3) - self.assertNotEquals(token2, token3) - self.assertNotEquals(token4, token6) - self.assertNotEquals(token5, token6) + self.assertNotEqual(token1, token3) + self.assertNotEqual(token2, token3) + self.assertNotEqual(token4, token6) + self.assertNotEqual(token5, token6) def test_repr_equality(self): "check that eval(repr(token)) == token" From 32ac6958e1618e9025486212dac412346126bccd Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Mar 2013 20:59:23 -0400 Subject: [PATCH 113/180] Apply some bugfixes to SmartList to fix tests on Python 3. - Add a _SliceNormalizerMixIn to properly handle slices. - Use floor division when applying key.step. - Implement sort() without 'cmp' parameter. - Fix bytes(list) behavior. - Children of _ListProxies are now _ListProxies, not regular lists. --- mwparserfromhell/smart_list.py | 137 +++++++++++++++++++++++++++-------------- tests/test_smart_list.py | 12 ++-- 2 files changed, 99 insertions(+), 50 deletions(-) diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py index 46c475a..09b7bbb 100644 --- a/mwparserfromhell/smart_list.py +++ b/mwparserfromhell/smart_list.py @@ -41,8 +41,23 @@ def inheritdoc(method): method.__doc__ = getattr(list, method.__name__).__doc__ return method +class _SliceNormalizerMixIn(object): + """MixIn that provides a private method to normalize slices.""" -class SmartList(list): + def _normalize_slice(self, key): + """Return a slice equivalent to the input *key*, standardized.""" + if key.start is not None: + start = (len(self) + key.start) if key.start < 0 else key.start + else: + start = 0 + if key.stop is not None: + stop = (len(self) + key.stop) if key.stop < 0 else key.stop + else: + stop = maxsize + return slice(start, stop, key.step or 1) + + +class SmartList(_SliceNormalizerMixIn, list): """Implements the ``list`` interface with special handling of sublists. When a sublist is created (by ``list[i:j]``), any changes made to this @@ -76,8 +91,8 @@ class SmartList(list): def __getitem__(self, key): if not isinstance(key, slice): return super(SmartList, self).__getitem__(key) - keystop = maxsize if key.stop is None else key.stop - sliceinfo = [key.start or 0, keystop, key.step or 1] + key = self._normalize_slice(key) + sliceinfo = [key.start, key.stop, key.step] child = _ListProxy(self, sliceinfo) self._children[id(child)] = (child, sliceinfo) return child @@ -87,9 +102,8 @@ class SmartList(list): return super(SmartList, self).__setitem__(key, item) item = list(item) super(SmartList, self).__setitem__(key, item) - keystop = maxsize if key.stop is None else key.stop - key = slice(key.start or 0, keystop, key.step or 1) - diff = len(item) + (key.start - key.stop) / key.step + key = self._normalize_slice(key) + diff = len(item) + (key.start - key.stop) // key.step values = self._children.values if py3k else self._children.itervalues if diff: for child, (start, stop, step) in values(): @@ -101,11 +115,10 @@ class SmartList(list): def __delitem__(self, key): super(SmartList, self).__delitem__(key) if isinstance(key, slice): - keystop = maxsize if key.stop is None else key.stop - key = slice(key.start or 0, keystop, key.step or 1) + key = self._normalize_slice(key) else: key = slice(key, key + 1, 1) - diff = (key.stop - key.start) / key.step + diff = (key.stop - key.start) // key.step values = self._children.values if py3k else self._children.itervalues for child, (start, stop, step) in values(): if start > key.start: @@ -166,22 +179,35 @@ class SmartList(list): child._parent = copy super(SmartList, self).reverse() - @inheritdoc - def sort(self, cmp=None, key=None, reverse=None): - copy = list(self) - for child in self._children: - child._parent = copy - kwargs = {} - if cmp is not None: - kwargs["cmp"] = cmp - if key is not None: - kwargs["key"] = key - if reverse is not None: - kwargs["reverse"] = reverse - super(SmartList, self).sort(**kwargs) - - -class _ListProxy(list): + if py3k: + @inheritdoc + def sort(self, key=None, reverse=None): + copy = list(self) + for child in self._children: + child._parent = copy + kwargs = {} + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + super(SmartList, self).sort(**kwargs) + else: + @inheritdoc + def sort(self, cmp=None, key=None, reverse=None): + copy = list(self) + for child in self._children: + child._parent = copy + kwargs = {} + if cmp is not None: + kwargs["cmp"] = cmp + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + super(SmartList, self).sort(**kwargs) + + +class _ListProxy(_SliceNormalizerMixIn, list): """Implement the ``list`` interface by getting elements from a parent. This is created by a :py:class:`~.SmartList` object when slicing. It does @@ -235,19 +261,28 @@ class _ListProxy(list): return bool(self._render()) def __len__(self): - return (self._stop - self._start) / self._step + return (self._stop - self._start) // self._step def __getitem__(self, key): - return self._render()[key] + if isinstance(key, slice): + key = self._normalize_slice(key) + if key.stop == maxsize: + keystop = self._stop + else: + keystop = key.stop + self._start + adjusted = slice(key.start + self._start, keystop, key.step) + return self._parent[adjusted] + else: + return self._render()[key] def __setitem__(self, key, item): if isinstance(key, slice): - keystart = (key.start or 0) + self._start - if key.stop is None or key.stop == maxsize: + key = self._normalize_slice(key) + if key.stop == maxsize: keystop = self._stop else: keystop = key.stop + self._start - adjusted = slice(keystart, keystop, key.step) + adjusted = slice(key.start + self._start, keystop, key.step) self._parent[adjusted] = item else: length = len(self) @@ -259,12 +294,12 @@ class _ListProxy(list): def __delitem__(self, key): if isinstance(key, slice): - keystart = (key.start or 0) + self._start - if key.stop is None or key.stop == maxsize: + key = self._normalize_slice(key) + if key.stop == maxsize: keystop = self._stop else: keystop = key.stop + self._start - adjusted = slice(keystart, keystop, key.step) + adjusted = slice(key.start + self._start, keystop, key.step) del self._parent[adjusted] else: length = len(self) @@ -388,18 +423,30 @@ class _ListProxy(list): item.reverse() self._parent[self._start:self._stop:self._step] = item - @inheritdoc - def sort(self, cmp=None, key=None, reverse=None): - item = self._render() - kwargs = {} - if cmp is not None: - kwargs["cmp"] = cmp - if key is not None: - kwargs["key"] = key - if reverse is not None: - kwargs["reverse"] = reverse - item.sort(**kwargs) - self._parent[self._start:self._stop:self._step] = item + if py3k: + @inheritdoc + def sort(self, key=None, reverse=None): + item = self._render() + kwargs = {} + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + item.sort(**kwargs) + self._parent[self._start:self._stop:self._step] = item + else: + @inheritdoc + def sort(self, cmp=None, key=None, reverse=None): + item = self._render() + kwargs = {} + if cmp is not None: + kwargs["cmp"] = cmp + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + item.sort(**kwargs) + self._parent[self._start:self._stop:self._step] = item del inheritdoc diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 01caca7..3423bb7 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -123,7 +123,7 @@ class TestSmartList(unittest.TestCase): if py3k: self.assertEqual("[0, 1, 2, 3, 'one', 'two']", str(list1)) - self.assertEqual(b"[0, 1, 2, 3, 'one', 'two']", bytes(list1)) + self.assertEqual(b"\x00\x01\x02", bytes(list4)) self.assertEqual("[0, 1, 2, 3, 'one', 'two']", repr(list1)) else: self.assertEqual("[0, 1, 2, 3, u'one', u'two']", unicode(list1)) @@ -256,10 +256,12 @@ class TestSmartList(unittest.TestCase): self.assertEqual([0, 2, 2, 3, 4, 5], list1) list1.sort(reverse=True) self.assertEqual([5, 4, 3, 2, 2, 0], list1) - list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y)) # Distance from 3 - self.assertEqual([3, 4, 2, 2, 5, 0], list1) - list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y), reverse=True) - self.assertEqual([0, 5, 4, 2, 2, 3], list1) + if not py3k: + func = lambda x, y: abs(3 - x) - abs(3 - y) # Distance from 3 + list1.sort(cmp=func) + self.assertEqual([3, 4, 2, 2, 5, 0], list1) + list1.sort(cmp=func, reverse=True) + self.assertEqual([0, 5, 4, 2, 2, 3], list1) list3.sort(key=lambda i: i[1]) self.assertEqual([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) list3.sort(key=lambda i: i[1], reverse=True) From eae6f11add071401c95e89c5f8ea42be2d0c96aa Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Mar 2013 21:24:45 -0400 Subject: [PATCH 114/180] Make _test_tokenizer import relative so tests work on py3k. --- tests/test_ctokenizer.py | 2 +- tests/test_pytokenizer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index f21378c..7a082e8 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -28,7 +28,7 @@ try: except ImportError: CTokenizer = None -from _test_tokenizer import TokenizerTestCase +from ._test_tokenizer import TokenizerTestCase @unittest.skipUnless(CTokenizer, "C tokenizer not available") class TestCTokenizer(TokenizerTestCase, unittest.TestCase): diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index 3e598bf..697c7e5 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -25,7 +25,7 @@ import unittest from mwparserfromhell.parser.tokenizer import Tokenizer -from _test_tokenizer import TokenizerTestCase +from ._test_tokenizer import TokenizerTestCase class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): """Test cases for the Python tokenizer.""" From 1b69b5e882944abf0909816d2daed76c37cbe9c8 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 30 Mar 2013 16:46:39 -0400 Subject: [PATCH 115/180] Moving compat stuff exclusively for unit tests to its own file. --- mwparserfromhell/compat.py | 7 ------- tests/compat.py | 20 ++++++++++++++++++++ tests/test_docs.py | 4 +++- tests/test_parser.py | 3 ++- tests/test_smart_list.py | 4 +++- tests/test_string_mixin.py | 4 +++- 6 files changed, 31 insertions(+), 11 deletions(-) create mode 100644 tests/compat.py diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py index 34870e6..bb81513 100755 --- a/mwparserfromhell/compat.py +++ b/mwparserfromhell/compat.py @@ -16,21 +16,14 @@ if py3k: bytes = bytes str = str basestring = str - range = range maxsize = sys.maxsize import html.entities as htmlentities - from io import StringIO - from urllib.parse import urlencode - from urllib.request import urlopen else: bytes = str str = unicode basestring = basestring - range = xrange maxsize = sys.maxint import htmlentitydefs as htmlentities - from StringIO import StringIO - from urllib import urlencode, urlopen del sys diff --git a/tests/compat.py b/tests/compat.py new file mode 100644 index 0000000..8bed40e --- /dev/null +++ b/tests/compat.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- + +""" +Serves the same purpose as mwparserfromhell.compat, but only for objects +required by unit tests. This avoids unnecessary imports (like urllib) within +the main library. +""" + +from mwparserfromhell.compat import py3k + +if py3k: + range = range + from io import StringIO + from urllib.parse import urlencode + from urllib.request import urlopen + +else: + range = xrange + from StringIO import StringIO + from urllib import urlencode, urlopen diff --git a/tests/test_docs.py b/tests/test_docs.py index 3b23bb7..8d95c47 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -25,7 +25,9 @@ import json import unittest import mwparserfromhell -from mwparserfromhell.compat import py3k, str, StringIO, urlencode, urlopen +from mwparserfromhell.compat import py3k, str + +from .compat import StringIO, urlencode, urlopen class TestDocs(unittest.TestCase): """Integration test cases for mwparserfromhell's documentation.""" diff --git a/tests/test_parser.py b/tests/test_parser.py index 4f718c8..1c37a85 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -24,12 +24,13 @@ from __future__ import unicode_literals import unittest from mwparserfromhell import parser -from mwparserfromhell.compat import range from mwparserfromhell.nodes import Template, Text, Wikilink from mwparserfromhell.nodes.extras import Parameter from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode +from .compat import range + class TestParser(unittest.TestCase): """Tests for the Parser class itself, which tokenizes and builds nodes.""" diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 3423bb7..25df555 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -23,9 +23,11 @@ from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import py3k, range +from mwparserfromhell.compat import py3k from mwparserfromhell.smart_list import SmartList, _ListProxy +from .compat import range + class TestSmartList(unittest.TestCase): """Test cases for the SmartList class and its child, _ListProxy.""" diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 6d10609..306f2fd 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -25,9 +25,11 @@ from sys import getdefaultencoding from types import GeneratorType import unittest -from mwparserfromhell.compat import bytes, py3k, range, str +from mwparserfromhell.compat import bytes, py3k, str from mwparserfromhell.string_mixin import StringMixIn +from .compat import range + class _FakeString(StringMixIn): def __init__(self, data): self._data = data From e3f89af62dcc323b6119174a07868057e814ede9 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 30 Mar 2013 18:38:29 -0400 Subject: [PATCH 116/180] Adding a TreeEqualityTestCase base class. --- tests/_test_tokenizer.py | 3 +- tests/_test_tree_equality.py | 78 ++++++++++++++++++++++++++++++++++++++++++++ tests/test_ctokenizer.py | 2 +- tests/test_parser.py | 33 ++----------------- tests/test_pytokenizer.py | 2 +- 5 files changed, 84 insertions(+), 34 deletions(-) create mode 100644 tests/_test_tree_equality.py diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 379b4fa..13882aa 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -21,6 +21,7 @@ # SOFTWARE. from __future__ import print_function, unicode_literals +from unittest import TestCase from os import listdir, path from mwparserfromhell.compat import py3k @@ -31,7 +32,7 @@ class _TestParseError(Exception): pass -class TokenizerTestCase(object): +class TokenizerTestCase(TestCase): """A base test case for tokenizers, whose tests are loaded dynamically. Subclassed along with unittest.TestCase to form TestPyTokenizer and diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py new file mode 100644 index 0000000..26c373d --- /dev/null +++ b/tests/_test_tree_equality.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +from unittest import TestCase + +from mwparserfromhell.nodes import Template, Text, Wikilink +from mwparserfromhell.nodes.extras import Parameter +from mwparserfromhell.wikicode import Wikicode + +class TreeEqualityTestCase(TestCase): + """A base test case with support for comparing the equality of node trees. + + This adds a number of type equality functions, for Wikicode, Text, + Templates, and Wikilinks. + """ + + def assertNodeEqual(self, expected, actual): + registry = { + Text: self.assertTextNodeEqual, + Template: self.assertTemplateNodeEqual, + Wikilink: self.assertWikilinkNodeEqual + } + for nodetype in registry: + if isinstance(expected, nodetype): + self.assertIsInstance(actual, nodetype) + registry[nodetype](expected, actual) + + def assertTextNodeEqual(self, expected, actual): + """Assert that two Text nodes have the same data.""" + self.assertEqual(expected.value, actual.value) + + def assertTemplateNodeEqual(self, expected, actual): + """Assert that two Template nodes have the same data.""" + self.assertWikicodeEqual(expected.name, actual.name) + length = len(expected.params) + self.assertEqual(length, len(actual.params)) + for i in range(length): + exp_param = expected.params[i] + act_param = actual.params[i] + self.assertWikicodeEqual(exp_param.name, act_param.name) + self.assertWikicodeEqual(exp_param.value, act_param.value) + self.assertIs(exp_param.showkey, act_param.showkey) + + def assertWikilinkNodeEqual(self, expected, actual): + """Assert that two Wikilink nodes have the same data.""" + self.assertWikicodeEqual(expected.title, actual.title) + if expected.text is not None: + self.assertWikicodeEqual(expected.text, actual.text) + else: + self.assertIs(None, actual.text) + + def assertWikicodeEqual(self, expected, actual): + """Assert that two Wikicode objects have the same data.""" + self.assertIsInstance(actual, Wikicode) + length = len(expected.nodes) + self.assertEqual(length, len(actual.nodes)) + for i in range(length): + self.assertNodeEqual(expected.get(i), actual.get(i)) diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 7a082e8..955b9a0 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -31,7 +31,7 @@ except ImportError: from ._test_tokenizer import TokenizerTestCase @unittest.skipUnless(CTokenizer, "C tokenizer not available") -class TestCTokenizer(TokenizerTestCase, unittest.TestCase): +class TestCTokenizer(TokenizerTestCase): """Test cases for the C tokenizer.""" @classmethod diff --git a/tests/test_parser.py b/tests/test_parser.py index 1c37a85..9d2c969 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -29,41 +29,12 @@ from mwparserfromhell.nodes.extras import Parameter from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode +from ._test_tree_equality import TreeEqualityTestCase from .compat import range -class TestParser(unittest.TestCase): +class TestParser(TreeEqualityTestCase): """Tests for the Parser class itself, which tokenizes and builds nodes.""" - def assertNodesEqual(self, expected, actual): - """Assert that two Nodes are the same type and have the same data.""" - self.assertIs(type(expected), type(actual)) - if isinstance(expected, Text): - self.assertEqual(expected.value, actual.value) - elif isinstance(expected, Template): - self.assertWikicodeEqual(expected.name, actual.name) - length = len(expected.params) - self.assertEqual(length, len(actual.params)) - for i in range(length): - exp_param = expected.params[i] - act_param = actual.params[i] - self.assertWikicodeEqual(exp_param.name, act_param.name) - self.assertWikicodeEqual(exp_param.value, act_param.value) - self.assertIs(exp_param.showkey, act_param.showkey) - elif isinstance(expected, Wikilink): - self.assertWikicodeEqual(expected.title, actual.title) - if expected.text is not None: - self.assertWikicodeEqual(expected.text, actual.text) - else: - self.assertIs(None, actual.text) - - def assertWikicodeEqual(self, expected, actual): - """Assert that two Wikicode objects have the same data.""" - self.assertIsInstance(actual, Wikicode) - length = len(expected.nodes) - self.assertEqual(length, len(actual.nodes)) - for i in range(length): - self.assertNodesEqual(expected.get(i), actual.get(i)) - def test_use_c(self): """make sure the correct tokenizer is used""" if parser.use_c: diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index 697c7e5..7b37eb3 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -27,7 +27,7 @@ from mwparserfromhell.parser.tokenizer import Tokenizer from ._test_tokenizer import TokenizerTestCase -class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): +class TestPyTokenizer(TokenizerTestCase): """Test cases for the Python tokenizer.""" @classmethod From a8cb275b941b70524e8b97341784097434ae627c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 1 Apr 2013 19:04:55 -0400 Subject: [PATCH 117/180] Add TestUtils; implement two tests for it. Also, add a missing docstring in TreeEqualityTestCase. --- tests/_test_tree_equality.py | 1 + tests/test_utils.py | 67 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 tests/test_utils.py diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index 26c373d..0fdb531 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -35,6 +35,7 @@ class TreeEqualityTestCase(TestCase): """ def assertNodeEqual(self, expected, actual): + """Assert that two Nodes have the same type and have the same data.""" registry = { Text: self.assertTextNodeEqual, Template: self.assertTemplateNodeEqual, diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..8afad7a --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.nodes import Template, Text +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.utils import parse_anything +from mwparserfromhell.wikicode import Wikicode + +from ._test_tree_equality import TreeEqualityTestCase + +class TestUtils(TreeEqualityTestCase): + """Tests for the utils module, which provides parse_anything().""" + + def test_parse_anything_valid(self): + """tests for valid input to utils.parse_anything()""" + wrap = lambda L: Wikicode(SmartList(L)) + textify = lambda L: wrap([Text(item) for item in L]) + tests = [ + (wrap([Text("foobar")]), textify(["foobar"])), + (Template(wrap([Text("spam")])), + wrap([Template(textify(["spam"]))])), + ("fóóbar", textify(["fóóbar"])), + (b"foobár", textify(["foobár"])), + (123, textify(["123"])), + (True, textify(["True"])), + (None, wrap([])), + ([Text("foo"), Text("bar"), Text("baz")], + textify(["foo", "bar", "baz"])), + ([wrap([Text("foo")]), Text("bar"), "baz", 123, 456], + textify(["foo", "bar", "baz", "123", "456"])), + ([[[([[((("foo",),),)], "bar"],)]]], textify(["foo", "bar"])) + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, parse_anything(test)) + + def test_parse_anything_invalid(self): + """tests for invalid input to utils.parse_anything()""" + self.assertRaises(ValueError, parse_anything, Ellipsis) + self.assertRaises(ValueError, parse_anything, object) + self.assertRaises(ValueError, parse_anything, object()) + self.assertRaises(ValueError, parse_anything, type) + self.assertRaises(ValueError, parse_anything, ["foo", [object]]) + +if __name__ == "__main__": + unittest.main(verbosity=2) From 30d4f137a829a7bfd613363f3579f97337462024 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 1 Apr 2013 19:06:59 -0400 Subject: [PATCH 118/180] Curse you, Python 3! --- tests/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 8afad7a..c088530 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -42,7 +42,7 @@ class TestUtils(TreeEqualityTestCase): (Template(wrap([Text("spam")])), wrap([Template(textify(["spam"]))])), ("fóóbar", textify(["fóóbar"])), - (b"foobár", textify(["foobár"])), + (b"foob\xc3\xa1r", textify(["foobár"])), (123, textify(["123"])), (True, textify(["True"])), (None, wrap([])), From cda1ce95f3b46c3392e57de182bc925c815b7d1f Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 1 Apr 2013 19:11:30 -0400 Subject: [PATCH 119/180] Roll back part of e3f89af62d because CURSE YOU UNIT TESTING FRAMEWORK --- tests/_test_tokenizer.py | 3 +-- tests/test_ctokenizer.py | 2 +- tests/test_pytokenizer.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 13882aa..379b4fa 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -21,7 +21,6 @@ # SOFTWARE. from __future__ import print_function, unicode_literals -from unittest import TestCase from os import listdir, path from mwparserfromhell.compat import py3k @@ -32,7 +31,7 @@ class _TestParseError(Exception): pass -class TokenizerTestCase(TestCase): +class TokenizerTestCase(object): """A base test case for tokenizers, whose tests are loaded dynamically. Subclassed along with unittest.TestCase to form TestPyTokenizer and diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 955b9a0..7a082e8 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -31,7 +31,7 @@ except ImportError: from ._test_tokenizer import TokenizerTestCase @unittest.skipUnless(CTokenizer, "C tokenizer not available") -class TestCTokenizer(TokenizerTestCase): +class TestCTokenizer(TokenizerTestCase, unittest.TestCase): """Test cases for the C tokenizer.""" @classmethod diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index 7b37eb3..697c7e5 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -27,7 +27,7 @@ from mwparserfromhell.parser.tokenizer import Tokenizer from ._test_tokenizer import TokenizerTestCase -class TestPyTokenizer(TokenizerTestCase): +class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): """Test cases for the Python tokenizer.""" @classmethod From 892092434fa748ef06ff2558c5b9dbfce9155071 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 1 Apr 2013 21:04:53 -0400 Subject: [PATCH 120/180] Skeleton for TestBuilder; adding some nodes to TreeEqualityTestCase. --- tests/_test_tree_equality.py | 38 ++++++++++++++++++++++++++++++------ tests/test_builder.py | 46 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 76 insertions(+), 8 deletions(-) diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index 0fdb531..16f4b49 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -23,8 +23,9 @@ from __future__ import unicode_literals from unittest import TestCase -from mwparserfromhell.nodes import Template, Text, Wikilink -from mwparserfromhell.nodes.extras import Parameter +from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, + Tag, Template, Text, Wikilink) +from mwparserfromhell.nodes.extras import Attribute, Parameter from mwparserfromhell.wikicode import Wikicode class TreeEqualityTestCase(TestCase): @@ -37,8 +38,13 @@ class TreeEqualityTestCase(TestCase): def assertNodeEqual(self, expected, actual): """Assert that two Nodes have the same type and have the same data.""" registry = { - Text: self.assertTextNodeEqual, + Argument: self.assertArgumentNodeEqual, + Comment: self.assertCommentNodeEqual, + Heading: self.assertHeadingNodeEqual, + HTMLEntity: self.assertHTMLEntityNodeEqual, + Tag: self.assertTagNodeEqual, Template: self.assertTemplateNodeEqual, + Text: self.assertTextNodeEqual, Wikilink: self.assertWikilinkNodeEqual } for nodetype in registry: @@ -46,9 +52,25 @@ class TreeEqualityTestCase(TestCase): self.assertIsInstance(actual, nodetype) registry[nodetype](expected, actual) - def assertTextNodeEqual(self, expected, actual): - """Assert that two Text nodes have the same data.""" - self.assertEqual(expected.value, actual.value) + def assertArgumentNodeEqual(self, expected, actual): + """Assert that two Argument nodes have the same data.""" + pass + + def assertCommentNodeEqual(self, expected, actual): + """Assert that two Comment nodes have the same data.""" + pass + + def assertHeadingNodeEqual(self, expected, actual): + """Assert that two Heading nodes have the same data.""" + pass + + def assertHTMLEntityNodeEqual(self, expected, actual): + """Assert that two HTMLEntity nodes have the same data.""" + pass + + def assertTagNodeEqual(self, expected, actual): + """Assert that two Tag nodes have the same data.""" + pass def assertTemplateNodeEqual(self, expected, actual): """Assert that two Template nodes have the same data.""" @@ -62,6 +84,10 @@ class TreeEqualityTestCase(TestCase): self.assertWikicodeEqual(exp_param.value, act_param.value) self.assertIs(exp_param.showkey, act_param.showkey) + def assertTextNodeEqual(self, expected, actual): + """Assert that two Text nodes have the same data.""" + self.assertEqual(expected.value, actual.value) + def assertWikilinkNodeEqual(self, expected, actual): """Assert that two Wikilink nodes have the same data.""" self.assertWikicodeEqual(expected.title, actual.title) diff --git a/tests/test_builder.py b/tests/test_builder.py index a3518fd..a80d8bf 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -23,8 +23,50 @@ from __future__ import unicode_literals import unittest -class TestBuilder(unittest.TestCase): - pass +from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, + Tag, Template, Text, Wikilink) +from mwparserfromhell.nodes.extras import Attribute, Parameter +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.wikicode import Wikicode + +from ._test_tree_equality import TreeEqualityTestCase + +wrap = lambda L: Wikicode(SmartList(L)) + +class TestBuilder(TreeEqualityTestCase): + """Tests for the builder, which turns tokens into Wikicode objects.""" + + def test_text(self): + """tests for building Text nodes""" + pass + + def test_template(self): + """tests for building Template nodes""" + pass + + def test_argument(self): + """tests for building Argument nodes""" + pass + + def test_wikilink(self): + """tests for building Wikilink nodes""" + pass + + def test_html_entity(self): + """tests for building HTMLEntity nodes""" + pass + + def test_heading(self): + """tests for building Heading nodes""" + pass + + def test_comment(self): + """tests for building Comment nodes""" + pass + + def test_tag(self): + """tests for building Tag nodes""" + pass if __name__ == "__main__": unittest.main(verbosity=2) From 404b4479a26ab89f41b2e9bae5c6ffc8d5777f67 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 1 Apr 2013 21:30:19 -0400 Subject: [PATCH 121/180] Implement the remaining asserts in TreeEqualityTestCase. --- mwparserfromhell/nodes/html_entity.py | 5 ++++- tests/_test_tree_equality.py | 18 +++++++++++++----- tests/test_builder.py | 1 + 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py index 221040b..5b7607c 100644 --- a/mwparserfromhell/nodes/html_entity.py +++ b/mwparserfromhell/nodes/html_entity.py @@ -135,7 +135,10 @@ class HTMLEntity(Node): @hex_char.setter def hex_char(self, newval): - self._hex_char = bool(newval) + newval = str(newval) + if newval not in ("x", "X"): + raise ValueError(newval) + self._hex_char = newval def normalize(self): """Return the unicode character represented by the HTML entity.""" diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index 16f4b49..2014ac1 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -54,23 +54,31 @@ class TreeEqualityTestCase(TestCase): def assertArgumentNodeEqual(self, expected, actual): """Assert that two Argument nodes have the same data.""" - pass + self.assertWikicodeEqual(expected.name, actual.name) + if expected.default is not None: + self.assertWikicodeEqual(expected.default, actual.default) + else: + self.assertIs(None, actual.default) def assertCommentNodeEqual(self, expected, actual): """Assert that two Comment nodes have the same data.""" - pass + self.assertWikicodeEqual(expected.contents, actual.contents) def assertHeadingNodeEqual(self, expected, actual): """Assert that two Heading nodes have the same data.""" - pass + self.assertWikicodeEqual(expected.title, actual.title) + self.assertEqual(expected.level, actual.level) def assertHTMLEntityNodeEqual(self, expected, actual): """Assert that two HTMLEntity nodes have the same data.""" - pass + self.assertEqual(expected.value, actual.value) + self.assertIs(expected.named, actual.named) + self.assertIs(expected.hexadecimal, actual.hexadecimal) + self.assertEquals(expected.hex_char, actual.hex_char) def assertTagNodeEqual(self, expected, actual): """Assert that two Tag nodes have the same data.""" - pass + self.fail("Holding this until feature/html_tags is ready.") def assertTemplateNodeEqual(self, expected, actual): """Assert that two Template nodes have the same data.""" diff --git a/tests/test_builder.py b/tests/test_builder.py index a80d8bf..e6919c1 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -64,6 +64,7 @@ class TestBuilder(TreeEqualityTestCase): """tests for building Comment nodes""" pass + @unittest.skip("holding this until feature/html_tags is ready") def test_tag(self): """tests for building Tag nodes""" pass From cb23587ab6e4cb3dfc21d817f2cb7b18c5542a60 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 3 Apr 2013 11:00:07 -0400 Subject: [PATCH 122/180] Adding some Builder tests --- tests/test_builder.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/tests/test_builder.py b/tests/test_builder.py index e6919c1..d577bfc 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -26,6 +26,8 @@ import unittest from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, Tag, Template, Text, Wikilink) from mwparserfromhell.nodes.extras import Attribute, Parameter +from mwparserfromhell.parser import tokens +from mwparserfromhell.parser.builder import Builder from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode @@ -36,13 +38,34 @@ wrap = lambda L: Wikicode(SmartList(L)) class TestBuilder(TreeEqualityTestCase): """Tests for the builder, which turns tokens into Wikicode objects.""" + def setUp(self): + self.builder = Builder() + def test_text(self): """tests for building Text nodes""" - pass + tests = [ + ([tokens.Text(text="foobar")], wrap([Text("foobar")])), + ([tokens.Text(text="fóóbar")], wrap([Text("fóóbar")])), + ([tokens.Text(text="spam"), tokens.Text(text="eggs")], + wrap([Text("spam"), Text("eggs")])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) def test_template(self): """tests for building Template nodes""" - pass + tests = [ + ([tokens.TemplateOpen(), tokens.Text(text="foobar"), tokens.TemplateClose()], + wrap([Template(wrap([Text("foobar")]))])), + ([tokens.TemplateOpen(), tokens.Text(text="spam"), tokens.Text(text="eggs"), tokens.TemplateClose()], + wrap([Template(wrap([Text("spam"), Text("eggs")]))])), + ([tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), tokens.TemplateClose()], + wrap([Template(wrap([Text("foo")]), params=[Parameter(wrap([Text("1")]), wrap([Text("bar")]), showkey=False)])])), + ([tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), tokens.TemplateParamEquals(), tokens.Text(text="baz"), tokens.TemplateClose()], + wrap([Template(wrap([Text("foo")]), params=[Parameter(wrap([Text("bar")]), wrap([Text("baz")]))])])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) def test_argument(self): """tests for building Argument nodes""" From b8e8d057abc4fefec78f967adf30326669c0726c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 4 Apr 2013 10:49:04 -0400 Subject: [PATCH 123/180] Finish test_template() --- tests/test_builder.py | 50 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/tests/test_builder.py b/tests/test_builder.py index d577bfc..952b501 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -47,7 +47,7 @@ class TestBuilder(TreeEqualityTestCase): ([tokens.Text(text="foobar")], wrap([Text("foobar")])), ([tokens.Text(text="fóóbar")], wrap([Text("fóóbar")])), ([tokens.Text(text="spam"), tokens.Text(text="eggs")], - wrap([Text("spam"), Text("eggs")])), + wrap([Text("spam"), Text("eggs")])), ] for test, valid in tests: self.assertWikicodeEqual(valid, self.builder.build(test)) @@ -55,14 +55,46 @@ class TestBuilder(TreeEqualityTestCase): def test_template(self): """tests for building Template nodes""" tests = [ - ([tokens.TemplateOpen(), tokens.Text(text="foobar"), tokens.TemplateClose()], - wrap([Template(wrap([Text("foobar")]))])), - ([tokens.TemplateOpen(), tokens.Text(text="spam"), tokens.Text(text="eggs"), tokens.TemplateClose()], - wrap([Template(wrap([Text("spam"), Text("eggs")]))])), - ([tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), tokens.TemplateClose()], - wrap([Template(wrap([Text("foo")]), params=[Parameter(wrap([Text("1")]), wrap([Text("bar")]), showkey=False)])])), - ([tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), tokens.TemplateParamEquals(), tokens.Text(text="baz"), tokens.TemplateClose()], - wrap([Template(wrap([Text("foo")]), params=[Parameter(wrap([Text("bar")]), wrap([Text("baz")]))])])), + ([tokens.TemplateOpen(), tokens.Text(text="foobar"), + tokens.TemplateClose()], + wrap([Template(wrap([Text("foobar")]))])), + + ([tokens.TemplateOpen(), tokens.Text(text="spam"), + tokens.Text(text="eggs"), tokens.TemplateClose()], + wrap([Template(wrap([Text("spam"), Text("eggs")]))])), + + ([tokens.TemplateOpen(), tokens.Text(text="foo"), + tokens.TemplateParamSeparator(), tokens.Text(text="bar"), + tokens.TemplateClose()], + wrap([Template(wrap([Text("foo")]), params=[ + Parameter(wrap([Text("1")]), wrap([Text("bar")]), + showkey=False)])])), + + ([tokens.TemplateOpen(), tokens.Text(text="foo"), + tokens.TemplateParamSeparator(), tokens.Text(text="bar"), + tokens.TemplateParamEquals(), tokens.Text(text="baz"), + tokens.TemplateClose()], + wrap([Template(wrap([Text("foo")]), params=[ + Parameter(wrap([Text("bar")]), wrap([Text("baz")]))])])), + + ([tokens.TemplateOpen(), tokens.Text(text="foo"), + tokens.TemplateParamSeparator(), tokens.Text(text="bar"), + tokens.TemplateParamEquals(), tokens.Text(text="baz"), + tokens.TemplateParamSeparator(), tokens.Text(text="biz"), + tokens.TemplateParamSeparator(), tokens.Text(text="buzz"), + tokens.TemplateParamSeparator(), tokens.Text(text="3"), + tokens.TemplateParamEquals(), tokens.Text(text="buff"), + tokens.TemplateParamSeparator(), tokens.Text(text="baff"), + tokens.TemplateClose()], + wrap([Template(wrap([Text("foo")]), params=[ + Parameter(wrap([Text("bar")]), wrap([Text("baz")])), + Parameter(wrap([Text("1")]), wrap([Text("biz")]), + showkey=False), + Parameter(wrap([Text("2")]), wrap([Text("buzz")]), + showkey=False), + Parameter(wrap([Text("3")]), wrap([Text("buff")])), + Parameter(wrap([Text("3")]), wrap([Text("baff")]), + showkey=False)])])), ] for test, valid in tests: self.assertWikicodeEqual(valid, self.builder.build(test)) From e32a6692f8ad9f8d6c57a56ca40e8aedf128c074 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 4 Apr 2013 10:59:16 -0400 Subject: [PATCH 124/180] test_argument() --- tests/test_builder.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/tests/test_builder.py b/tests/test_builder.py index 952b501..e632644 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -101,7 +101,29 @@ class TestBuilder(TreeEqualityTestCase): def test_argument(self): """tests for building Argument nodes""" - pass + tests = [ + ([tokens.ArgumentOpen(), tokens.Text(text="foobar"), + tokens.ArgumentClose()], + wrap([Argument(wrap([Text("foobar")]))])), + + ([tokens.ArgumentOpen(), tokens.Text(text="spam"), + tokens.Text(text="eggs"), tokens.ArgumentClose()], + wrap([Argument(wrap([Text("spam"), Text("eggs")]))])), + + ([tokens.ArgumentOpen(), tokens.Text(text="foo"), + tokens.ArgumentSeparator(), tokens.Text(text="bar"), + tokens.ArgumentClose()], + wrap([Argument(wrap([Text("foo")]), wrap([Text("bar")]))])), + + ([tokens.ArgumentOpen(), tokens.Text(text="foo"), + tokens.Text(text="bar"), tokens.ArgumentSeparator(), + tokens.Text(text="baz"), tokens.Text(text="biz"), + tokens.ArgumentClose()], + wrap([Argument(wrap([Text("foo"), Text("bar")]), + wrap([Text("baz"), Text("biz")]))])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) def test_wikilink(self): """tests for building Wikilink nodes""" From 7289d8c070a6fcd2bceaa8e00e7661c9c21461a5 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 5 Apr 2013 10:25:48 -0400 Subject: [PATCH 125/180] test_wikilink(); fix indentation --- tests/test_builder.py | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/tests/test_builder.py b/tests/test_builder.py index e632644..ea38dae 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -68,7 +68,7 @@ class TestBuilder(TreeEqualityTestCase): tokens.TemplateClose()], wrap([Template(wrap([Text("foo")]), params=[ Parameter(wrap([Text("1")]), wrap([Text("bar")]), - showkey=False)])])), + showkey=False)])])), ([tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), @@ -89,12 +89,12 @@ class TestBuilder(TreeEqualityTestCase): wrap([Template(wrap([Text("foo")]), params=[ Parameter(wrap([Text("bar")]), wrap([Text("baz")])), Parameter(wrap([Text("1")]), wrap([Text("biz")]), - showkey=False), + showkey=False), Parameter(wrap([Text("2")]), wrap([Text("buzz")]), - showkey=False), + showkey=False), Parameter(wrap([Text("3")]), wrap([Text("buff")])), Parameter(wrap([Text("3")]), wrap([Text("baff")]), - showkey=False)])])), + showkey=False)])])), ] for test, valid in tests: self.assertWikicodeEqual(valid, self.builder.build(test)) @@ -127,7 +127,29 @@ class TestBuilder(TreeEqualityTestCase): def test_wikilink(self): """tests for building Wikilink nodes""" - pass + tests = [ + ([tokens.WikilinkOpen(), tokens.Text(text="foobar"), + tokens.WikilinkClose()], + wrap([Wikilink(wrap([Text("foobar")]))])), + + ([tokens.WikilinkOpen(), tokens.Text(text="spam"), + tokens.Text(text="eggs"), tokens.WikilinkClose()], + wrap([Wikilink(wrap([Text("spam"), Text("eggs")]))])), + + ([tokens.WikilinkOpen(), tokens.Text(text="foo"), + tokens.WikilinkSeparator(), tokens.Text(text="bar"), + tokens.WikilinkClose()], + wrap([Wikilink(wrap([Text("foo")]), wrap([Text("bar")]))])), + + ([tokens.WikilinkOpen(), tokens.Text(text="foo"), + tokens.Text(text="bar"), tokens.WikilinkSeparator(), + tokens.Text(text="baz"), tokens.Text(text="biz"), + tokens.WikilinkClose()], + wrap([Wikilink(wrap([Text("foo"), Text("bar")]), + wrap([Text("baz"), Text("biz")]))])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) def test_html_entity(self): """tests for building HTMLEntity nodes""" From e9463543f46c49748740f69c5e5bcdb569338a2a Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 5 Apr 2013 10:46:43 -0400 Subject: [PATCH 126/180] test_html_entity() --- tests/test_builder.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/tests/test_builder.py b/tests/test_builder.py index ea38dae..7dcbc0e 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -153,7 +153,23 @@ class TestBuilder(TreeEqualityTestCase): def test_html_entity(self): """tests for building HTMLEntity nodes""" - pass + tests = [ + ([tokens.HTMLEntityStart(), tokens.Text(text="nbsp"), + tokens.HTMLEntityEnd()], + wrap([HTMLEntity("nbsp", named=True, hexadecimal=False)])), + + ([tokens.HTMLEntityStart(), tokens.HTMLEntityNumeric(), + tokens.Text(text="107"), tokens.HTMLEntityEnd()], + wrap([HTMLEntity("107", named=False, hexadecimal=False)])), + + ([tokens.HTMLEntityStart(), tokens.HTMLEntityNumeric(), + tokens.HTMLEntityHex(char="X"), tokens.Text(text="6B"), + tokens.HTMLEntityEnd()], + wrap([HTMLEntity("6B", named=False, hexadecimal=True, + hex_char="X")])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) def test_heading(self): """tests for building Heading nodes""" From 132c6584d059497374c7f0c53285e6251beb6675 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 5 Apr 2013 10:52:43 -0400 Subject: [PATCH 127/180] test_heading() and test_comment() --- tests/test_builder.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/tests/test_builder.py b/tests/test_builder.py index 7dcbc0e..410eb4a 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -173,11 +173,31 @@ class TestBuilder(TreeEqualityTestCase): def test_heading(self): """tests for building Heading nodes""" - pass + tests = [ + ([tokens.HeadingStart(level=2), tokens.Text(text="foobar"), + tokens.HeadingEnd()], + wrap([Heading(wrap([Text("foobar")]), 2)])), + + ([tokens.HeadingStart(level=4), tokens.Text(text="spam"), + tokens.Text(text="eggs"), tokens.HeadingEnd()], + wrap([Heading(wrap([Text("spam"), Text("eggs")]), 4)])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) def test_comment(self): """tests for building Comment nodes""" - pass + tests = [ + ([tokens.CommentStart(), tokens.Text(text="foobar"), + tokens.CommentEnd()], + wrap([Comment(wrap([Text("foobar")]))])), + + ([tokens.CommentStart(), tokens.Text(text="spam"), + tokens.Text(text="eggs"), tokens.CommentEnd()], + wrap([Comment(wrap([Text("spam"), Text("eggs")]))])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) @unittest.skip("holding this until feature/html_tags is ready") def test_tag(self): From 094e867ee6d7a2f34c6555e318ccdb1622526484 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 6 Apr 2013 15:45:51 -0400 Subject: [PATCH 128/180] Add test_integration(); add a horrible abuse of PEP8 --- tests/_test_tree_equality.py | 2 +- tests/test_builder.py | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index 2014ac1..758a72e 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -74,7 +74,7 @@ class TreeEqualityTestCase(TestCase): self.assertEqual(expected.value, actual.value) self.assertIs(expected.named, actual.named) self.assertIs(expected.hexadecimal, actual.hexadecimal) - self.assertEquals(expected.hex_char, actual.hex_char) + self.assertEqual(expected.hex_char, actual.hex_char) def assertTagNodeEqual(self, expected, actual): """Assert that two Tag nodes have the same data.""" diff --git a/tests/test_builder.py b/tests/test_builder.py index 410eb4a..9425713 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -204,5 +204,25 @@ class TestBuilder(TreeEqualityTestCase): """tests for building Tag nodes""" pass + def test_integration(self): + """a test for building a combination of templates together""" + test = [tokens.TemplateOpen(), tokens.TemplateOpen(), + tokens.TemplateOpen(), tokens.TemplateOpen(), + tokens.Text(text="foo"), tokens.TemplateClose(), + tokens.Text(text="bar"), tokens.TemplateParamSeparator(), + tokens.Text(text="baz"), tokens.TemplateParamEquals(), + tokens.Text(text="biz"), tokens.TemplateClose(), + tokens.Text(text="buzz"), tokens.TemplateClose(), + tokens.Text(text="usr"), tokens.TemplateParamSeparator(), + tokens.TemplateOpen(), tokens.Text(text="bin"), + tokens.TemplateClose(), tokens.TemplateClose()] + valid = wrap( + [Template(wrap([Template(wrap([Template(wrap([Template(wrap([Text( + "foo")])), Text("bar")]), params=[Parameter(wrap([Text("baz")]), + wrap([Text("biz")]))]), Text("buzz")])), Text("usr")]), params=[ + Parameter(wrap([Text("1")]), wrap([Template(wrap([Text("bin")]))]), + showkey=False)])]) + self.assertWikicodeEqual(valid, self.builder.build(test)) + if __name__ == "__main__": unittest.main(verbosity=2) From 2d9b8a39b6509d8a39dcf12b90dbcb2e8f07433f Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 6 Apr 2013 16:17:47 -0400 Subject: [PATCH 129/180] test_integration2(); finish TestBuilder --- tests/test_builder.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/test_builder.py b/tests/test_builder.py index 9425713..1e578ed 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -206,6 +206,7 @@ class TestBuilder(TreeEqualityTestCase): def test_integration(self): """a test for building a combination of templates together""" + # {{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}} test = [tokens.TemplateOpen(), tokens.TemplateOpen(), tokens.TemplateOpen(), tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateClose(), @@ -224,5 +225,37 @@ class TestBuilder(TreeEqualityTestCase): showkey=False)])]) self.assertWikicodeEqual(valid, self.builder.build(test)) + def test_integration2(self): + """an even more audacious test for building a horrible wikicode mess""" + # {{a|b|{{c|[[d]]{{{e}}}}}}}[[f|{{{g}}}]]{{i|j= }} + test = [tokens.TemplateOpen(), tokens.Text(text="a"), + tokens.TemplateParamSeparator(), tokens.Text(text="b"), + tokens.TemplateParamSeparator(), tokens.TemplateOpen(), + tokens.Text(text="c"), tokens.TemplateParamSeparator(), + tokens.WikilinkOpen(), tokens.Text(text="d"), + tokens.WikilinkClose(), tokens.ArgumentOpen(), + tokens.Text(text="e"), tokens.ArgumentClose(), + tokens.TemplateClose(), tokens.TemplateClose(), + tokens.WikilinkOpen(), tokens.Text(text="f"), + tokens.WikilinkSeparator(), tokens.ArgumentOpen(), + tokens.Text(text="g"), tokens.ArgumentClose(), + tokens.CommentStart(), tokens.Text(text="h"), + tokens.CommentEnd(), tokens.WikilinkClose(), + tokens.TemplateOpen(), tokens.Text(text="i"), + tokens.TemplateParamSeparator(), tokens.Text(text="j"), + tokens.TemplateParamEquals(), tokens.HTMLEntityStart(), + tokens.Text(text="nbsp"), tokens.HTMLEntityEnd(), + tokens.TemplateClose()] + valid = wrap( + [Template(wrap([Text("a")]), params=[Parameter(wrap([Text("1")]), + wrap([Text("b")]), showkey=False), Parameter(wrap([Text("2")]), + wrap([Template(wrap([Text("c")]), params=[Parameter(wrap([Text("1") + ]), wrap([Wikilink(wrap([Text("d")])), Argument(wrap([Text("e")]))] + ), showkey=False)])]), showkey=False)]), Wikilink(wrap([Text("f")] + ), wrap([Argument(wrap([Text("g")])), Comment(wrap([Text("h")]))]) + ), Template(wrap([Text("i")]), params=[Parameter(wrap([Text("j")]), + wrap([HTMLEntity("nbsp", named=True)]))])]) + self.assertWikicodeEqual(valid, self.builder.build(test)) + if __name__ == "__main__": unittest.main(verbosity=2) From b0e3cd9cae58a0ac8490d2ee0c9b87e05de456b5 Mon Sep 17 00:00:00 2001 From: Kunal Mehta Date: Thu, 18 Apr 2013 18:23:08 -0500 Subject: [PATCH 130/180] Fix Pywikipedia references in documentation --- docs/integration.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/integration.rst b/docs/integration.rst index d0e54db..bd4e0ac 100644 --- a/docs/integration.rst +++ b/docs/integration.rst @@ -7,12 +7,12 @@ Integration :py:func:`mwparserfromhell.parse() ` on :py:meth:`~earwigbot.wiki.page.Page.get`. -If you're using PyWikipedia_, your code might look like this:: +If you're using Pywikipedia_, your code might look like this:: import mwparserfromhell import wikipedia as pywikibot def parse(title): - site = pywikibot.get_site() + site = pywikibot.getSite() page = pywikibot.Page(site, title) text = page.get() return mwparserfromhell.parse(text) @@ -31,5 +31,5 @@ following code (via the API_):: return mwparserfromhell.parse(text) .. _EarwigBot: https://github.com/earwig/earwigbot -.. _PyWikipedia: http://pywikipediabot.sourceforge.net/ +.. _Pywikipedia: http://pywikipediabot.sourceforge.net/ .. _API: http://mediawiki.org/wiki/API From 9c7517b22a6ff0f0ab8834b2e39bf56d886d6989 Mon Sep 17 00:00:00 2001 From: Kunal Mehta Date: Thu, 18 Apr 2013 18:32:51 -0500 Subject: [PATCH 131/180] Link to mediawiki.org instead of sf.net --- docs/integration.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/integration.rst b/docs/integration.rst index bd4e0ac..78810b8 100644 --- a/docs/integration.rst +++ b/docs/integration.rst @@ -31,5 +31,5 @@ following code (via the API_):: return mwparserfromhell.parse(text) .. _EarwigBot: https://github.com/earwig/earwigbot -.. _Pywikipedia: http://pywikipediabot.sourceforge.net/ +.. _Pywikipedia: https://www.mediawiki.org/wiki/Manual:Pywikipediabot .. _API: http://mediawiki.org/wiki/API From 6e399275263af3feae4bcd43ae17ccd5c7d2d1b6 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 18 Apr 2013 20:06:27 -0400 Subject: [PATCH 132/180] Update README with same changes (#27) --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 90e896f..77262ca 100644 --- a/README.rst +++ b/README.rst @@ -107,12 +107,12 @@ Integration ``Page`` objects have a ``parse`` method that essentially calls ``mwparserfromhell.parse()`` on ``page.get()``. -If you're using PyWikipedia_, your code might look like this:: +If you're using Pywikipedia_, your code might look like this:: import mwparserfromhell import wikipedia as pywikibot def parse(title): - site = pywikibot.get_site() + site = pywikibot.getSite() page = pywikibot.Page(site, title) text = page.get() return mwparserfromhell.parse(text) @@ -138,5 +138,5 @@ following code (via the API_):: .. _Python Package Index: http://pypi.python.org .. _get pip: http://pypi.python.org/pypi/pip .. _EarwigBot: https://github.com/earwig/earwigbot -.. _PyWikipedia: http://pywikipediabot.sourceforge.net/ +.. _Pywikipedia: https://www.mediawiki.org/wiki/Manual:Pywikipediabot .. _API: http://mediawiki.org/wiki/API From 8db40689edf51d6febfaae3340fc6af6d34329ad Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 20 Apr 2013 17:59:20 -0400 Subject: [PATCH 133/180] Improve a few things about nodes; simply a method in Wikicode. --- mwparserfromhell/nodes/html_entity.py | 47 ++++++++++++++++++++++++++++------- mwparserfromhell/nodes/wikilink.py | 5 +++- mwparserfromhell/wikicode.py | 8 +----- 3 files changed, 43 insertions(+), 17 deletions(-) diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py index 5b7607c..1bf1c78 100644 --- a/mwparserfromhell/nodes/html_entity.py +++ b/mwparserfromhell/nodes/html_entity.py @@ -63,7 +63,8 @@ class HTMLEntity(Node): return self.normalize() return self - def _unichr(self, value): + @staticmethod + def _unichr(value): """Implement the builtin unichr() with support for non-BMP code points. On wide Python builds, this functions like the normal unichr(). On @@ -119,19 +120,47 @@ class HTMLEntity(Node): @value.setter def value(self, newval): newval = str(newval) - if newval not in htmlentities.entitydefs: - test = int(self.value, 16) - if test < 0 or (test > 0x10FFFF and int(self.value) > 0x10FFFF): - raise ValueError(newval) + try: + int(newval) + except ValueError: + try: + int(newval, 16) + except ValueError: + if newval not in htmlentities.entitydefs: + raise ValueError("entity value is not a valid name") + self._named = True + self._hexadecimal = False + else: + if int(newval, 16) < 0 or int(newval, 16) > 0x10FFFF: + raise ValueError("entity value is not in range(0x110000)") + self._named = False + self._hexadecimal = True + else: + test = int(newval, 16 if self.hexadecimal else 10) + if test < 0 or test > 0x10FFFF: + raise ValueError("entity value is not in range(0x110000)") + self._named = False self._value = newval @named.setter def named(self, newval): - self._named = bool(newval) + newval = bool(newval) + if newval and self.value not in htmlentities.entitydefs: + raise ValueError("entity value is not a valid name") + if not newval: + try: + int(self.value, 16) + except ValueError: + err = "current entity value is not a valid Unicode codepoint" + raise ValueError(err) + self._named = newval @hexadecimal.setter def hexadecimal(self, newval): - self._hexadecimal = bool(newval) + newval = bool(newval) + if newval and self.named: + raise ValueError("a named entity cannot be hexadecimal") + self._hexadecimal = newval @hex_char.setter def hex_char(self, newval): @@ -145,5 +174,5 @@ class HTMLEntity(Node): if self.named: return unichr(htmlentities.name2codepoint[self.value]) if self.hexadecimal: - return self._unichr(int(self.value, 16)) - return self._unichr(int(self.value)) + return HTMLEntity._unichr(int(self.value, 16)) + return HTMLEntity._unichr(int(self.value)) diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py index 6fea468..527e9bb 100644 --- a/mwparserfromhell/nodes/wikilink.py +++ b/mwparserfromhell/nodes/wikilink.py @@ -79,4 +79,7 @@ class Wikilink(Node): @text.setter def text(self, value): - self._text = parse_anything(value) + if value is None: + self._text = None + else: + self._text = parse_anything(value) diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 8d8ebe2..f2d9c89 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -88,13 +88,7 @@ class Wikicode(StringMixIn): If *obj* is a ``Node``, the function will test whether they are the same object, otherwise it will compare them with ``==``. """ - if isinstance(obj, Node): - if node is obj: - return True - else: - if node == obj: - return True - return False + return (node is obj) if isinstance(obj, Node) else (node == obj) def _contains(self, nodes, obj): """Return ``True`` if *obj* is inside of *nodes*, else ``False``. From 5cf451eb22aa47b119183eb25de141627d0e1ef7 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 20 Apr 2013 18:01:39 -0400 Subject: [PATCH 134/180] Adding a bunch of tests for different nodes. --- tests/test_argument.py | 99 ++++++++++++++++++++++++++ tests/test_comment.py | 62 +++++++++++++++++ tests/test_heading.py | 88 ++++++++++++++++++++++++ tests/test_html_entity.py | 172 ++++++++++++++++++++++++++++++++++++++++++++++ tests/test_text.py | 69 +++++++++++++++++++ tests/test_wikilink.py | 99 ++++++++++++++++++++++++++ 6 files changed, 589 insertions(+) create mode 100644 tests/test_argument.py create mode 100644 tests/test_comment.py create mode 100644 tests/test_heading.py create mode 100644 tests/test_html_entity.py create mode 100644 tests/test_text.py create mode 100644 tests/test_wikilink.py diff --git a/tests/test_argument.py b/tests/test_argument.py new file mode 100644 index 0000000..e0524c4 --- /dev/null +++ b/tests/test_argument.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import str +from mwparserfromhell.nodes import Argument, Text +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.wikicode import Wikicode + +from ._test_tree_equality import TreeEqualityTestCase + +wrap = lambda L: Wikicode(SmartList(L)) + +class TestArgument(TreeEqualityTestCase): + """Test cases for the Argument node.""" + + def test_unicode(self): + """test Argument.__unicode__()""" + node = Argument(wrap([Text("foobar")])) + self.assertEqual("{{{foobar}}}", str(node)) + node2 = Argument(wrap([Text("foo")]), wrap([Text("bar")])) + self.assertEqual("{{{foo|bar}}}", str(node2)) + + def test_strip(self): + """test Argument.__strip__()""" + node = Argument(wrap([Text("foobar")])) + self.assertIs(None, node.__strip__(True, True)) + self.assertIs(None, node.__strip__(True, False)) + self.assertIs(None, node.__strip__(False, True)) + self.assertIs(None, node.__strip__(False, False)) + + node2 = Argument(wrap([Text("foo")]), wrap([Text("bar")])) + self.assertEqual("bar", node2.__strip__(True, True)) + self.assertEqual("bar", node2.__strip__(True, False)) + self.assertEqual("bar", node2.__strip__(False, True)) + self.assertEqual("bar", node2.__strip__(False, False)) + + def test_showtree(self): + """test Argument.__showtree__()""" + output = [] + getter, marker = object(), object() + get = lambda code: output.append((getter, code)) + mark = lambda: output.append(marker) + node1 = Argument(wrap([Text("foobar")])) + node2 = Argument(wrap([Text("foo")]), wrap([Text("bar")])) + node1.__showtree__(output.append, get, mark) + node2.__showtree__(output.append, get, mark) + valid = [ + "{{{", (getter, node1.name), "}}}", "{{{", (getter, node2.name), + " | ", marker, (getter, node2.default), "}}}"] + self.assertEqual(valid, output) + + def test_name(self): + """test getter/setter for the name attribute""" + name = wrap([Text("foobar")]) + node1 = Argument(name) + node2 = Argument(name, wrap([Text("baz")])) + self.assertIs(name, node1.name) + self.assertIs(name, node2.name) + node1.name = "héhehé" + node2.name = "héhehé" + self.assertWikicodeEqual(wrap([Text("héhehé")]), node1.name) + self.assertWikicodeEqual(wrap([Text("héhehé")]), node2.name) + + def test_default(self): + """test getter/setter for the default attribute""" + default = wrap([Text("baz")]) + node1 = Argument(wrap([Text("foobar")])) + node2 = Argument(wrap([Text("foobar")]), default) + self.assertIs(None, node1.default) + self.assertIs(default, node2.default) + node1.default = "buzz" + node2.default = None + self.assertWikicodeEqual(wrap([Text("buzz")]), node1.default) + self.assertIs(None, node2.default) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_comment.py b/tests/test_comment.py new file mode 100644 index 0000000..980f594 --- /dev/null +++ b/tests/test_comment.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import str +from mwparserfromhell.nodes import Comment + +from ._test_tree_equality import TreeEqualityTestCase + +class TestComment(TreeEqualityTestCase): + """Test cases for the Comment node.""" + + def test_unicode(self): + """test Comment.__unicode__()""" + node = Comment("foobar") + self.assertEqual("", str(node)) + + def test_strip(self): + """test Comment.__strip__()""" + node = Comment("foobar") + self.assertIs(None, node.__strip__(True, True)) + self.assertIs(None, node.__strip__(True, False)) + self.assertIs(None, node.__strip__(False, True)) + self.assertIs(None, node.__strip__(False, False)) + + def test_showtree(self): + """test Comment.__showtree__()""" + output = [] + node = Comment("foobar") + node.__showtree__(output.append, None, None) + self.assertEqual([""], output) + + def test_contents(self): + """test getter/setter for the contents attribute""" + node = Comment("foobar") + self.assertEqual("foobar", node.contents) + node.contents = "barfoo" + self.assertEqual("barfoo", node.contents) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_heading.py b/tests/test_heading.py new file mode 100644 index 0000000..a0e78e5 --- /dev/null +++ b/tests/test_heading.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import str +from mwparserfromhell.nodes import Heading, Text +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.wikicode import Wikicode + +from ._test_tree_equality import TreeEqualityTestCase + +wrap = lambda L: Wikicode(SmartList(L)) + +class TestHeading(TreeEqualityTestCase): + """Test cases for the Heading node.""" + + def test_unicode(self): + """test Heading.__unicode__()""" + node = Heading(wrap([Text("foobar")]), 2) + self.assertEqual("==foobar==", str(node)) + node2 = Heading(wrap([Text(" zzz ")]), 5) + self.assertEqual("===== zzz =====", str(node2)) + + def test_strip(self): + """test Heading.__strip__()""" + node = Heading(wrap([Text("foobar")]), 3) + self.assertEqual("foobar", node.__strip__(True, True)) + self.assertEqual("foobar", node.__strip__(True, False)) + self.assertEqual("foobar", node.__strip__(False, True)) + self.assertEqual("foobar", node.__strip__(False, False)) + + def test_showtree(self): + """test Heading.__showtree__()""" + output = [] + getter = object() + get = lambda code: output.append((getter, code)) + node1 = Heading(wrap([Text("foobar")]), 3) + node2 = Heading(wrap([Text(" baz ")]), 4) + node1.__showtree__(output.append, get, None) + node2.__showtree__(output.append, get, None) + valid = ["===", (getter, node1.title), "===", + "====", (getter, node2.title), "===="] + self.assertEqual(valid, output) + + def test_title(self): + """test getter/setter for the title attribute""" + title = wrap([Text("foobar")]) + node = Heading(title, 3) + self.assertIs(title, node.title) + node.title = "héhehé" + self.assertWikicodeEqual(wrap([Text("héhehé")]), node.title) + + def test_level(self): + """test getter/setter for the level attribute""" + node = Heading(wrap([Text("foobar")]), 3) + self.assertEqual(3, node.level) + node.level = 5 + self.assertEqual(5, node.level) + node.level = True + self.assertEqual(1, node.level) + self.assertRaises(ValueError, setattr, node, "level", 0) + self.assertRaises(ValueError, setattr, node, "level", 7) + self.assertRaises(ValueError, setattr, node, "level", "abc") + self.assertRaises(ValueError, setattr, node, "level", False) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py new file mode 100644 index 0000000..20c8fc0 --- /dev/null +++ b/tests/test_html_entity.py @@ -0,0 +1,172 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import str +from mwparserfromhell.nodes import HTMLEntity +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.wikicode import Wikicode + +from ._test_tree_equality import TreeEqualityTestCase + +wrap = lambda L: Wikicode(SmartList(L)) + +class TestHTMLEntity(TreeEqualityTestCase): + """Test cases for the HTMLEntity node.""" + + def test_unicode(self): + """test HTMLEntity.__unicode__()""" + node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) + node2 = HTMLEntity("107", named=False, hexadecimal=False) + node3 = HTMLEntity("6b", named=False, hexadecimal=True) + node4 = HTMLEntity("6C", named=False, hexadecimal=True, hex_char="X") + self.assertEqual(" ", str(node1)) + self.assertEqual("k", str(node2)) + self.assertEqual("k", str(node3)) + self.assertEqual("l", str(node4)) + + def test_strip(self): + """test HTMLEntity.__strip__()""" + node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) + node2 = HTMLEntity("107", named=False, hexadecimal=False) + node3 = HTMLEntity("e9", named=False, hexadecimal=True) + + self.assertEqual("\xa0", node1.__strip__(True, True)) + self.assertEqual("\xa0", node1.__strip__(True, False)) + self.assertEqual(" ", node1.__strip__(False, True)) + self.assertEqual(" ", node1.__strip__(False, False)) + self.assertEqual("k", node2.__strip__(True, True)) + self.assertEqual("k", node2.__strip__(True, False)) + self.assertEqual("k", node2.__strip__(False, True)) + self.assertEqual("k", node2.__strip__(False, False)) + self.assertEqual("é", node3.__strip__(True, True)) + self.assertEqual("é", node3.__strip__(True, False)) + self.assertEqual("é", node3.__strip__(False, True)) + self.assertEqual("é", node3.__strip__(False, False)) + + def test_showtree(self): + """test HTMLEntity.__showtree__()""" + output = [] + node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) + node2 = HTMLEntity("107", named=False, hexadecimal=False) + node3 = HTMLEntity("e9", named=False, hexadecimal=True) + node1.__showtree__(output.append, None, None) + node2.__showtree__(output.append, None, None) + node3.__showtree__(output.append, None, None) + res = [" ", "k", "é"] + self.assertEqual(res, output) + + def test_value(self): + """test HTMLEntity.value()""" + node1 = HTMLEntity("nbsp") + node2 = HTMLEntity("107") + node3 = HTMLEntity("e9") + self.assertEquals("nbsp", node1.value) + self.assertEquals("107", node2.value) + self.assertEquals("e9", node3.value) + + node1.value = "ffa4" + node2.value = 72 + node3.value = "Sigma" + self.assertEquals("ffa4", node1.value) + self.assertFalse(node1.named) + self.assertTrue(node1.hexadecimal) + self.assertEquals("72", node2.value) + self.assertFalse(node2.named) + self.assertFalse(node2.hexadecimal) + self.assertEquals("Sigma", node3.value) + self.assertTrue(node3.named) + self.assertFalse(node3.hexadecimal) + + node1.value = "10FFFF" + node2.value = 110000 + node2.value = 1114111 + self.assertRaises(ValueError, setattr, node3, "value", "") + self.assertRaises(ValueError, setattr, node3, "value", "foobar") + self.assertRaises(ValueError, setattr, node3, "value", True) + self.assertRaises(ValueError, setattr, node3, "value", -1) + self.assertRaises(ValueError, setattr, node1, "value", 110000) + self.assertRaises(ValueError, setattr, node1, "value", "1114112") + + def test_named(self): + """test HTMLEntity.named()""" + node1 = HTMLEntity("nbsp") + node2 = HTMLEntity("107") + node3 = HTMLEntity("e9") + self.assertTrue(node1.named) + self.assertFalse(node2.named) + self.assertFalse(node3.named) + node1.named = 1 + node2.named = 0 + node3.named = 0 + self.assertTrue(node1.named) + self.assertFalse(node2.named) + self.assertFalse(node3.named) + self.assertRaises(ValueError, setattr, node1, "named", False) + self.assertRaises(ValueError, setattr, node2, "named", True) + self.assertRaises(ValueError, setattr, node3, "named", True) + + def test_hexadecimal(self): + """test HTMLEntity.hexadecimal()""" + node1 = HTMLEntity("nbsp") + node2 = HTMLEntity("107") + node3 = HTMLEntity("e9") + self.assertFalse(node1.hexadecimal) + self.assertFalse(node2.hexadecimal) + self.assertTrue(node3.hexadecimal) + node1.hexadecimal = False + node2.hexadecimal = True + node3.hexadecimal = False + self.assertFalse(node1.hexadecimal) + self.assertTrue(node2.hexadecimal) + self.assertFalse(node3.hexadecimal) + self.assertRaises(ValueError, setattr, node1, "hexadecimal", True) + + def test_hex_char(self): + """test HTMLEntity.hex_char()""" + node1 = HTMLEntity("e9") + node2 = HTMLEntity("e9", hex_char="X") + self.assertEquals("x", node1.hex_char) + self.assertEquals("X", node2.hex_char) + node1.hex_char = "X" + node2.hex_char = "x" + self.assertEquals("X", node1.hex_char) + self.assertEquals("x", node2.hex_char) + self.assertRaises(ValueError, setattr, node1, "hex_char", 123) + self.assertRaises(ValueError, setattr, node1, "hex_char", "foobar") + self.assertRaises(ValueError, setattr, node1, "hex_char", True) + + def test_normalize(self): + """test HTMLEntity.normalize()""" + node1 = HTMLEntity("nbsp") + node2 = HTMLEntity("107") + node3 = HTMLEntity("e9") + node4 = HTMLEntity("1f648") + self.assertEquals("\xa0", node1.normalize()) + self.assertEquals("k", node2.normalize()) + self.assertEquals("é", node3.normalize()) + self.assertEquals("\U0001F648", node4.normalize()) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_text.py b/tests/test_text.py new file mode 100644 index 0000000..13636bf --- /dev/null +++ b/tests/test_text.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import str +from mwparserfromhell.nodes import Text + +class TestText(unittest.TestCase): + """Test cases for the Text node.""" + + def test_unicode(self): + """test Text.__unicode__()""" + node = Text("foobar") + self.assertEqual("foobar", str(node)) + node2 = Text("fóóbar") + self.assertEqual("fóóbar", str(node2)) + + def test_strip(self): + """test Text.__strip__()""" + node = Text("foobar") + self.assertIs(node, node.__strip__(True, True)) + self.assertIs(node, node.__strip__(True, False)) + self.assertIs(node, node.__strip__(False, True)) + self.assertIs(node, node.__strip__(False, False)) + + def test_showtree(self): + """test Text.__showtree__()""" + output = [] + node1 = Text("foobar") + node2 = Text("fóóbar") + node3 = Text("𐌲𐌿𐍄") + node1.__showtree__(output.append, None, None) + node2.__showtree__(output.append, None, None) + node3.__showtree__(output.append, None, None) + res = ["foobar", r"f\xf3\xf3bar", "\\U00010332\\U0001033f\\U00010344"] + self.assertEqual(res, output) + + def test_value(self): + """test getter/setter for the value attribute""" + node = Text("foobar") + self.assertEqual("foobar", node.value) + self.assertIsInstance(node.value, str) + node.value = "héhéhé" + self.assertEqual("héhéhé", node.value) + self.assertIsInstance(node.value, str) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py new file mode 100644 index 0000000..422489f --- /dev/null +++ b/tests/test_wikilink.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import str +from mwparserfromhell.nodes import Text, Wikilink +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.wikicode import Wikicode + +from ._test_tree_equality import TreeEqualityTestCase + +wrap = lambda L: Wikicode(SmartList(L)) + +class TestWikilink(TreeEqualityTestCase): + """Test cases for the Wikilink node.""" + + def test_unicode(self): + """test Wikilink.__unicode__()""" + node = Wikilink(wrap([Text("foobar")])) + self.assertEqual("[[foobar]]", str(node)) + node2 = Wikilink(wrap([Text("foo")]), wrap([Text("bar")])) + self.assertEqual("[[foo|bar]]", str(node2)) + + def test_strip(self): + """test Wikilink.__strip__()""" + node = Wikilink(wrap([Text("foobar")])) + self.assertEqual("foobar", node.__strip__(True, True)) + self.assertEqual("foobar", node.__strip__(True, False)) + self.assertEqual("foobar", node.__strip__(False, True)) + self.assertEqual("foobar", node.__strip__(False, False)) + + node2 = Wikilink(wrap([Text("foo")]), wrap([Text("bar")])) + self.assertEqual("bar", node2.__strip__(True, True)) + self.assertEqual("bar", node2.__strip__(True, False)) + self.assertEqual("bar", node2.__strip__(False, True)) + self.assertEqual("bar", node2.__strip__(False, False)) + + def test_showtree(self): + """test Wikilink.__showtree__()""" + output = [] + getter, marker = object(), object() + get = lambda code: output.append((getter, code)) + mark = lambda: output.append(marker) + node1 = Wikilink(wrap([Text("foobar")])) + node2 = Wikilink(wrap([Text("foo")]), wrap([Text("bar")])) + node1.__showtree__(output.append, get, mark) + node2.__showtree__(output.append, get, mark) + valid = [ + "[[", (getter, node1.title), "]]", "[[", (getter, node2.title), + " | ", marker, (getter, node2.text), "]]"] + self.assertEqual(valid, output) + + def test_title(self): + """test getter/setter for the title attribute""" + title = wrap([Text("foobar")]) + node1 = Wikilink(title) + node2 = Wikilink(title, wrap([Text("baz")])) + self.assertIs(title, node1.title) + self.assertIs(title, node2.title) + node1.title = "héhehé" + node2.title = "héhehé" + self.assertWikicodeEqual(wrap([Text("héhehé")]), node1.title) + self.assertWikicodeEqual(wrap([Text("héhehé")]), node2.title) + + def test_text(self): + """test getter/setter for the text attribute""" + text = wrap([Text("baz")]) + node1 = Wikilink(wrap([Text("foobar")])) + node2 = Wikilink(wrap([Text("foobar")]), text) + self.assertIs(None, node1.text) + self.assertIs(text, node2.text) + node1.text = "buzz" + node2.text = None + self.assertWikicodeEqual(wrap([Text("buzz")]), node1.text) + self.assertIs(None, node2.text) + +if __name__ == "__main__": + unittest.main(verbosity=2) From 6bf8cfd2adcf536113f3a9ace3901b08540d7ff9 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 20 Apr 2013 18:09:34 -0400 Subject: [PATCH 135/180] Fix tests for Python 3. --- mwparserfromhell/nodes/html_entity.py | 55 ++++++++++++++++++----------------- tests/test_html_entity.py | 28 +++++++++--------- 2 files changed, 43 insertions(+), 40 deletions(-) diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py index 1bf1c78..b51bd92 100644 --- a/mwparserfromhell/nodes/html_entity.py +++ b/mwparserfromhell/nodes/html_entity.py @@ -23,7 +23,7 @@ from __future__ import unicode_literals from . import Node -from ..compat import htmlentities, str +from ..compat import htmlentities, py3k, str __all__ = ["HTMLEntity"] @@ -63,29 +63,31 @@ class HTMLEntity(Node): return self.normalize() return self - @staticmethod - def _unichr(value): - """Implement the builtin unichr() with support for non-BMP code points. + if not py3k: + @staticmethod + def _unichr(value): + """Implement builtin unichr() with support for non-BMP code points. - On wide Python builds, this functions like the normal unichr(). On - narrow builds, this returns the value's corresponding surrogate pair. - """ - try: - return unichr(value) - except ValueError: - # Test whether we're on the wide or narrow Python build. Check the - # length of a non-BMP code point (U+1F64A, SPEAK-NO-EVIL MONKEY): - if len("\U0001F64A") == 2: - # Ensure this is within the range we can encode: - if value > 0x10FFFF: - raise ValueError("unichr() arg not in range(0x110000)") - code = value - 0x10000 - if value < 0: # Invalid code point - raise - lead = 0xD800 + (code >> 10) - trail = 0xDC00 + (code % (1 << 10)) - return unichr(lead) + unichr(trail) - raise + On wide Python builds, this functions like the normal unichr(). On + narrow builds, this returns the value's encoded surrogate pair. + """ + try: + return unichr(value) + except ValueError: + # Test whether we're on the wide or narrow Python build. Check + # the length of a non-BMP code point + # (U+1F64A, SPEAK-NO-EVIL MONKEY): + if len("\U0001F64A") == 2: + # Ensure this is within the range we can encode: + if value > 0x10FFFF: + raise ValueError("unichr() arg not in range(0x110000)") + code = value - 0x10000 + if value < 0: # Invalid code point + raise + lead = 0xD800 + (code >> 10) + trail = 0xDC00 + (code % (1 << 10)) + return unichr(lead) + unichr(trail) + raise @property def value(self): @@ -171,8 +173,9 @@ class HTMLEntity(Node): def normalize(self): """Return the unicode character represented by the HTML entity.""" + chrfunc = chr if py3k else HTMLEntity._unichr if self.named: - return unichr(htmlentities.name2codepoint[self.value]) + return chrfunc(htmlentities.name2codepoint[self.value]) if self.hexadecimal: - return HTMLEntity._unichr(int(self.value, 16)) - return HTMLEntity._unichr(int(self.value)) + return chrfunc(int(self.value, 16)) + return chrfunc(int(self.value)) diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py index 20c8fc0..4bf32e8 100644 --- a/tests/test_html_entity.py +++ b/tests/test_html_entity.py @@ -82,20 +82,20 @@ class TestHTMLEntity(TreeEqualityTestCase): node1 = HTMLEntity("nbsp") node2 = HTMLEntity("107") node3 = HTMLEntity("e9") - self.assertEquals("nbsp", node1.value) - self.assertEquals("107", node2.value) - self.assertEquals("e9", node3.value) + self.assertEqual("nbsp", node1.value) + self.assertEqual("107", node2.value) + self.assertEqual("e9", node3.value) node1.value = "ffa4" node2.value = 72 node3.value = "Sigma" - self.assertEquals("ffa4", node1.value) + self.assertEqual("ffa4", node1.value) self.assertFalse(node1.named) self.assertTrue(node1.hexadecimal) - self.assertEquals("72", node2.value) + self.assertEqual("72", node2.value) self.assertFalse(node2.named) self.assertFalse(node2.hexadecimal) - self.assertEquals("Sigma", node3.value) + self.assertEqual("Sigma", node3.value) self.assertTrue(node3.named) self.assertFalse(node3.hexadecimal) @@ -147,12 +147,12 @@ class TestHTMLEntity(TreeEqualityTestCase): """test HTMLEntity.hex_char()""" node1 = HTMLEntity("e9") node2 = HTMLEntity("e9", hex_char="X") - self.assertEquals("x", node1.hex_char) - self.assertEquals("X", node2.hex_char) + self.assertEqual("x", node1.hex_char) + self.assertEqual("X", node2.hex_char) node1.hex_char = "X" node2.hex_char = "x" - self.assertEquals("X", node1.hex_char) - self.assertEquals("x", node2.hex_char) + self.assertEqual("X", node1.hex_char) + self.assertEqual("x", node2.hex_char) self.assertRaises(ValueError, setattr, node1, "hex_char", 123) self.assertRaises(ValueError, setattr, node1, "hex_char", "foobar") self.assertRaises(ValueError, setattr, node1, "hex_char", True) @@ -163,10 +163,10 @@ class TestHTMLEntity(TreeEqualityTestCase): node2 = HTMLEntity("107") node3 = HTMLEntity("e9") node4 = HTMLEntity("1f648") - self.assertEquals("\xa0", node1.normalize()) - self.assertEquals("k", node2.normalize()) - self.assertEquals("é", node3.normalize()) - self.assertEquals("\U0001F648", node4.normalize()) + self.assertEqual("\xa0", node1.normalize()) + self.assertEqual("k", node2.normalize()) + self.assertEqual("é", node3.normalize()) + self.assertEqual("\U0001F648", node4.normalize()) if __name__ == "__main__": unittest.main(verbosity=2) From 6a385b392190d9c4ce89c8cc8777efcb587972f5 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 20 Apr 2013 21:44:44 -0400 Subject: [PATCH 136/180] TestParameter and a fair chunk of TestTemplate. --- mwparserfromhell/nodes/template.py | 9 ++- tests/test_html_entity.py | 10 +-- tests/test_parameter.py | 79 +++++++++++++++++++++ tests/test_template.py | 140 +++++++++++++++++++++++++++++++++++++ 4 files changed, 228 insertions(+), 10 deletions(-) create mode 100644 tests/test_parameter.py create mode 100644 tests/test_template.py diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index e34ba7a..eb7f3a8 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -183,11 +183,10 @@ class Template(Node): def get(self, name): """Get the parameter whose name is *name*. - The returned object is a - :py:class:`~.Parameter` instance. Raises :py:exc:`ValueError` if no - parameter has this name. Since multiple parameters can have the same - name, we'll return the last match, since the last parameter is the only - one read by the MediaWiki parser. + The returned object is a :py:class:`~.Parameter` instance. Raises + :py:exc:`ValueError` if no parameter has this name. Since multiple + parameters can have the same name, we'll return the last match, since + the last parameter is the only one read by the MediaWiki parser. """ name = name.strip() if isinstance(name, basestring) else str(name) for param in reversed(self.params): diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py index 4bf32e8..a7a9669 100644 --- a/tests/test_html_entity.py +++ b/tests/test_html_entity.py @@ -78,7 +78,7 @@ class TestHTMLEntity(TreeEqualityTestCase): self.assertEqual(res, output) def test_value(self): - """test HTMLEntity.value()""" + """test getter/setter for the value attribute""" node1 = HTMLEntity("nbsp") node2 = HTMLEntity("107") node3 = HTMLEntity("e9") @@ -110,7 +110,7 @@ class TestHTMLEntity(TreeEqualityTestCase): self.assertRaises(ValueError, setattr, node1, "value", "1114112") def test_named(self): - """test HTMLEntity.named()""" + """test getter/setter for the named attribute""" node1 = HTMLEntity("nbsp") node2 = HTMLEntity("107") node3 = HTMLEntity("e9") @@ -128,7 +128,7 @@ class TestHTMLEntity(TreeEqualityTestCase): self.assertRaises(ValueError, setattr, node3, "named", True) def test_hexadecimal(self): - """test HTMLEntity.hexadecimal()""" + """test getter/setter for the hexadecimal attribute""" node1 = HTMLEntity("nbsp") node2 = HTMLEntity("107") node3 = HTMLEntity("e9") @@ -144,7 +144,7 @@ class TestHTMLEntity(TreeEqualityTestCase): self.assertRaises(ValueError, setattr, node1, "hexadecimal", True) def test_hex_char(self): - """test HTMLEntity.hex_char()""" + """test getter/setter for the hex_char attribute""" node1 = HTMLEntity("e9") node2 = HTMLEntity("e9", hex_char="X") self.assertEqual("x", node1.hex_char) @@ -158,7 +158,7 @@ class TestHTMLEntity(TreeEqualityTestCase): self.assertRaises(ValueError, setattr, node1, "hex_char", True) def test_normalize(self): - """test HTMLEntity.normalize()""" + """test getter/setter for the normalize attribute""" node1 = HTMLEntity("nbsp") node2 = HTMLEntity("107") node3 = HTMLEntity("e9") diff --git a/tests/test_parameter.py b/tests/test_parameter.py new file mode 100644 index 0000000..b46ad71 --- /dev/null +++ b/tests/test_parameter.py @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import str +from mwparserfromhell.nodes import Text +from mwparserfromhell.nodes.extras import Parameter +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.wikicode import Wikicode + +from ._test_tree_equality import TreeEqualityTestCase + +wrap = lambda L: Wikicode(SmartList(L)) + +class TestParameter(TreeEqualityTestCase): + """Test cases for the Parameter node extra.""" + + def test_unicode(self): + """test Parameter.__unicode__()""" + node = Parameter(wrap([Text("1")]), wrap([Text("foo")]), showkey=False) + self.assertEqual("foo", str(node)) + node2 = Parameter(wrap([Text("foo")]), wrap([Text("bar")])) + self.assertEqual("foo=bar", str(node2)) + + def test_name(self): + """test getter/setter for the name attribute""" + name1 = wrap([Text("1")]) + name2 = wrap([Text("foobar")]) + node1 = Parameter(name1, wrap([Text("foobar")]), showkey=False) + node2 = Parameter(name2, wrap([Text("baz")])) + self.assertIs(name1, node1.name) + self.assertIs(name2, node2.name) + node1.name = "héhehé" + node2.name = "héhehé" + self.assertWikicodeEqual(wrap([Text("héhehé")]), node1.name) + self.assertWikicodeEqual(wrap([Text("héhehé")]), node2.name) + + def test_value(self): + """test getter/setter for the value attribute""" + value = wrap([Text("bar")]) + node = Parameter(wrap([Text("foo")]), value) + self.assertIs(value, node.value) + node.value = "héhehé" + self.assertWikicodeEqual(wrap([Text("héhehé")]), node.value) + + def test_showkey(self): + """test getter/setter for the showkey attribute""" + node1 = Parameter(wrap([Text("1")]), wrap([Text("foo")]), showkey=False) + node2 = Parameter(wrap([Text("foo")]), wrap([Text("bar")])) + self.assertFalse(node1.showkey) + self.assertTrue(node2.showkey) + node1.showkey = True + node2.showkey = "" + self.assertTrue(node1.showkey) + self.assertFalse(node2.showkey) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_template.py b/tests/test_template.py new file mode 100644 index 0000000..fde7522 --- /dev/null +++ b/tests/test_template.py @@ -0,0 +1,140 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import str +from mwparserfromhell.nodes import Template, Text +from mwparserfromhell.nodes.extras import Parameter +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.wikicode import Wikicode +from ._test_tree_equality import TreeEqualityTestCase + +wrap = lambda L: Wikicode(SmartList(L)) +pgens = lambda k, v: Parameter(wrap([Text(k)]), wrap([Text(v)]), True) +pgenh = lambda k, v: Parameter(wrap([Text(k)]), wrap([Text(v)]), False) + +class TestTemplate(TreeEqualityTestCase): + """Test cases for the Template node.""" + + def test_unicode(self): + """test Template.__unicode__()""" + node = Template(wrap([Text("foobar")])) + self.assertEqual("{{foobar}}", str(node)) + node2 = Template(wrap([Text("foo")]), + [pgenh("1", "bar"), pgens("abc", "def")]) + self.assertEqual("{{foo|bar|abc=def}}", str(node2)) + + def test_strip(self): + """test Template.__strip__()""" + node1 = Template(wrap([Text("foobar")])) + node2 = Template(wrap([Text("foo")]), + [pgenh("1", "bar"), pgens("abc", "def")]) + for a in (True, False): + for b in (True, False): + self.assertEqual(None, node1.__strip__(a, b)) + self.assertEqual(None, node2.__strip__(a, b)) + + def test_showtree(self): + """test Template.__showtree__()""" + output = [] + getter, marker = object(), object() + get = lambda code: output.append((getter, code)) + mark = lambda: output.append(marker) + node1 = Template(wrap([Text("foobar")])) + node2 = Template(wrap([Text("foo")]), + [pgenh("1", "bar"), pgens("abc", "def")]) + node1.__showtree__(output.append, get, mark) + node2.__showtree__(output.append, get, mark) + valid = [ + "{{", (getter, node1.name), "}}", "{{", (getter, node2.name), + " | ", marker, (getter, node2.params[0].name), " = ", marker, + (getter, node2.params[0].value), " | ", marker, + (getter, node2.params[1].name), " = ", marker, + (getter, node2.params[1].value), "}}"] + self.assertEqual(valid, output) + + def test_name(self): + """test getter/setter for the name attribute""" + name = wrap([Text("foobar")]) + node1 = Template(name) + node2 = Template(name, [pgenh("1", "bar")]) + self.assertIs(name, node1.name) + self.assertIs(name, node2.name) + node1.name = "asdf" + node2.name = "téstïng" + self.assertWikicodeEqual(wrap([Text("asdf")]), node1.name) + self.assertWikicodeEqual(wrap([Text("téstïng")]), node2.name) + + def test_params(self): + """test getter for the params attribute""" + node1 = Template(wrap([Text("foobar")])) + plist = [pgenh("1", "bar"), pgens("abc", "def")] + node2 = Template(wrap([Text("foo")]), plist) + self.assertEqual([], node1.params) + self.assertIs(plist, node2.params) + + def test_has_param(self): + """test Template.has_param()""" + node1 = Template(wrap([Text("foobar")])) + node2 = Template(wrap([Text("foo")]), + [pgenh("1", "bar"), pgens("abc", "def")]) + node3 = Template(wrap([Text("foo")]), + [pgenh("1", "a"), pgens("b", "c"), pgens("1", "d")]) + node4 = Template(wrap([Text("foo")]), + [pgenh("1", "a"), pgens("b", " ")]) + self.assertFalse(node1.has_param("foobar")) + self.assertTrue(node2.has_param(1)) + self.assertTrue(node2.has_param("abc")) + self.assertFalse(node2.has_param("def")) + self.assertTrue(node3.has_param("1")) + self.assertTrue(node3.has_param("b")) + self.assertFalse(node4.has_param("b")) + self.assertTrue(node3.has_param("b", False)) + self.assertTrue(node4.has_param("b", False)) + + def test_get(self): + """test Template.get()""" + node1 = Template(wrap([Text("foobar")])) + node2p1 = pgenh("1", "bar") + node2p2 = pgens("abc", "def") + node2 = Template(wrap([Text("foo")]), [node2p1, node2p2]) + node3p1 = pgens("b", "c") + node3p2 = pgens("1", "d") + node3 = Template(wrap([Text("foo")]), + [pgenh("1", "a"), node3p1, node3p2]) + node4p1 = pgens("b", " ") + node4 = Template(wrap([Text("foo")]), [pgenh("1", "a"), node4p1]) + self.assertRaises(ValueError, node1.get, "foobar") + self.assertIs(node2p1, node2.get(1)) + self.assertIs(node2p2, node2.get("abc")) + self.assertRaises(ValueError, node2.get, "def") + self.assertIs(node3p1, node3.get("b")) + self.assertIs(node3p2, node3.get("1")) + self.assertIs(node4p1, node4.get("b")) + + # add + # remove + +if __name__ == "__main__": + unittest.main(verbosity=2) From debcb6577e80cb5c371513e73bb82f1d2c107ec1 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 21 Apr 2013 20:50:05 -0400 Subject: [PATCH 137/180] Fix recursion issues by giving up at a certain point (closes #16). - Stop parsing new templates if the template depth gets above MAX_DEPTH (40) or if we've already tried to parse over MAX_CYCLES (100,000) templates. - Add two tests to ensure recursion works somewhat correctly. - Fix parsing the string "{{" with the Python tokenizer; add a test. --- mwparserfromhell/parser/tokenizer.c | 18 +++++++++++++----- mwparserfromhell/parser/tokenizer.h | 5 +++++ mwparserfromhell/parser/tokenizer.py | 24 +++++++++++++++++++----- tests/tokenizer/templates.mwtest | 21 +++++++++++++++++++++ 4 files changed, 58 insertions(+), 10 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index d3abb22..875263c 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -109,6 +109,8 @@ Tokenizer_push(Tokenizer* self, int context) return -1; top->next = self->topstack; self->topstack = top; + self->depth++; + self->cycles++; return 0; } @@ -174,6 +176,7 @@ Tokenizer_delete_top_of_stack(Tokenizer* self) Textbuffer_dealloc(top->textbuffer); self->topstack = top->next; free(top); + self->depth--; } /* @@ -1269,10 +1272,14 @@ Tokenizer_parse(Tokenizer* self, int context) Tokenizer_write_text(self, this); } else if (this == next && next == *"{") { - if (Tokenizer_parse_template_or_argument(self)) - return NULL; - if (self->topstack->context & LC_FAIL_NEXT) - self->topstack->context ^= LC_FAIL_NEXT; + if (Tokenizer_CAN_RECURSE(self)) { + if (Tokenizer_parse_template_or_argument(self)) + return NULL; + if (self->topstack->context & LC_FAIL_NEXT) + self->topstack->context ^= LC_FAIL_NEXT; + } + else + Tokenizer_write_text(self, this); } else if (this == *"|" && this_context & LC_TEMPLATE) { if (Tokenizer_handle_template_param(self)) @@ -1295,7 +1302,8 @@ Tokenizer_parse(Tokenizer* self, int context) Tokenizer_write_text(self, this); } else if (this == next && next == *"[") { - if (!(this_context & LC_WIKILINK_TITLE)) { + if (!(this_context & LC_WIKILINK_TITLE) && + Tokenizer_CAN_RECURSE(self)) { if (Tokenizer_parse_wikilink(self)) return NULL; if (self->topstack->context & LC_FAIL_NEXT) diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index 693538c..0730ea8 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -46,6 +46,8 @@ static const char* MARKERS[] = { #define NUM_MARKERS 18 #define TEXTBUFFER_BLOCKSIZE 1024 +#define MAX_DEPTH 40 +#define MAX_CYCLES 100000 #define MAX_ENTITY_SIZE 8 static int route_state = 0; @@ -165,12 +167,15 @@ typedef struct { Py_ssize_t head; /* current position in text */ Py_ssize_t length; /* length of text */ int global; /* global context */ + int depth; /* stack recursion depth */ + int cycles; /* total number of stack recursions */ } Tokenizer; /* Macros for accessing Tokenizer data: */ #define Tokenizer_READ(self, delta) (*PyUnicode_AS_UNICODE(Tokenizer_read(self, delta))) +#define Tokenizer_CAN_RECURSE(self) (self->depth < MAX_DEPTH && self->cycles < MAX_CYCLES) /* Function prototypes: */ diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index f995937..24eb9db 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -42,6 +42,8 @@ class Tokenizer(object): END = object() MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", "/", "-", "!", "\n", END] + MAX_DEPTH = 40 + MAX_CYCLES = 100000 regex = re.compile(r"([{}\[\]<>|=&#*;:/\-!\n])", flags=re.IGNORECASE) def __init__(self): @@ -49,6 +51,8 @@ class Tokenizer(object): self._head = 0 self._stacks = [] self._global = 0 + self._depth = 0 + self._cycles = 0 @property def _stack(self): @@ -76,6 +80,8 @@ class Tokenizer(object): def _push(self, context=0): """Add a new token stack, context, and textbuffer to the list.""" self._stacks.append([[], context, []]) + self._depth += 1 + self._cycles += 1 def _push_textbuffer(self): """Push the textbuffer onto the stack as a Text node and clear it.""" @@ -90,6 +96,7 @@ class Tokenizer(object): stack's context with the current stack's. """ self._push_textbuffer() + self._depth -= 1 if keep_context: context = self._context stack = self._stacks.pop()[0] @@ -97,6 +104,10 @@ class Tokenizer(object): return stack return self._stacks.pop()[0] + def _can_recurse(self): + """Return whether or not our max recursion depth has been exceeded.""" + return self._depth < self.MAX_DEPTH and self._cycles < self.MAX_CYCLES + def _fail_route(self): """Fail the current tokenization route. @@ -418,7 +429,7 @@ class Tokenizer(object): else: if this == "\n": self._context |= contexts.FAIL_ON_TEXT - elif this is not self.END or not this.isspace(): + elif this is self.END or not this.isspace(): self._context |= contexts.HAS_TEXT return True else: @@ -479,9 +490,12 @@ class Tokenizer(object): else: self._write_text(this) elif this == next == "{": - self._parse_template_or_argument() - if self._context & contexts.FAIL_NEXT: - self._context ^= contexts.FAIL_NEXT + if self._can_recurse(): + self._parse_template_or_argument() + if self._context & contexts.FAIL_NEXT: + self._context ^= contexts.FAIL_NEXT + else: + self._write_text("{") elif this == "|" and self._context & contexts.TEMPLATE: self._handle_template_param() elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY: @@ -496,7 +510,7 @@ class Tokenizer(object): else: self._write_text("}") elif this == next == "[": - if not self._context & contexts.WIKILINK_TITLE: + if not self._context & contexts.WIKILINK_TITLE and self._can_recurse(): self._parse_wikilink() if self._context & contexts.FAIL_NEXT: self._context ^= contexts.FAIL_NEXT diff --git a/tests/tokenizer/templates.mwtest b/tests/tokenizer/templates.mwtest index fa3c0a4..cf41bb3 100644 --- a/tests/tokenizer/templates.mwtest +++ b/tests/tokenizer/templates.mwtest @@ -481,6 +481,13 @@ output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text=" --- +name: incomplete_stub +label: incomplete templates that should fail gracefully: just an opening +input: "{{" +output: [Text(text="{{")] + +--- + name: incomplete_plain label: incomplete templates that should fail gracefully: no close whatsoever input: "{{stuff}} {{foobar" @@ -597,3 +604,17 @@ name: incomplete_nested_template_as_param_value label: incomplete templates that should fail gracefully: a valid nested template as a parameter value input: "{{stuff}} {{foo|bar={{baz}}" output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar="), TemplateOpen(), Text(text="baz"), TemplateClose()] + +--- + +name: recursion_one_hundred_opens +label: test potentially dangerous recursion: one hundred template openings +input: "{{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{" +output: [Text(text="{{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{")] + +--- + +name: recursion_opens_and_closes +label: test potentially dangerous recursion: template openings and closings +input: "{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}" +output: [Text(text="{{|"), TemplateOpen(), TemplateClose(), Text(text="{{|"), TemplateOpen(), TemplateClose(), TemplateOpen(), TemplateParamSeparator(), TemplateOpen(), TemplateClose(), Text(text="{{"), TemplateParamSeparator(), Text(text="{{"), TemplateClose(), Text(text="{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}")] From 786d6192746284ef19c166c4d9eb95050c661b1c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 24 Apr 2013 10:28:17 -0400 Subject: [PATCH 138/180] Drop force_no_field in template.remove(); implement test_remove(). - Also add tests for spacing in param names. --- mwparserfromhell/nodes/template.py | 27 ++++++++++-------- tests/test_template.py | 56 ++++++++++++++++++++++++++++++++++---- 2 files changed, 66 insertions(+), 17 deletions(-) diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index eb7f3a8..751c2b1 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -142,9 +142,9 @@ class Template(Node): return False return True - def _remove_without_field(self, param, i, force_no_field): + def _remove_without_field(self, param, i): """Return False if a parameter name should be kept, otherwise True.""" - if not param.showkey and not force_no_field: + if not param.showkey: dependents = [not after.showkey for after in self.params[i+1:]] if any(dependents): return False @@ -266,22 +266,23 @@ class Template(Node): self.params.append(param) return param - def remove(self, name, keep_field=False, force_no_field=False): + def remove(self, name, keep_field=False): """Remove a parameter from the template whose name is *name*. If *keep_field* is ``True``, we will keep the parameter's name, but blank its value. Otherwise, we will remove the parameter completely *unless* other parameters are dependent on it (e.g. removing ``bar`` from ``{{foo|bar|baz}}`` is unsafe because ``{{foo|baz}}`` is not what - we expected, so ``{{foo||baz}}`` will be produced instead), unless - *force_no_field* is also ``True``. If the parameter shows up multiple - times in the template, we will remove all instances of it (and keep - one if *keep_field* is ``True`` - that being the first instance if - none of the instances have dependents, otherwise that instance will be - kept). + we expected, so ``{{foo||baz}}`` will be produced instead). + + If the parameter shows up multiple times in the template, we will + remove all instances of it (and keep one if *keep_field* is ``True`` - + the first instance if none have dependents, otherwise the one with + dependents will be kept). """ name = name.strip() if isinstance(name, basestring) else str(name) removed = False + to_remove =[] for i, param in enumerate(self.params): if param.name.strip() == name: if keep_field: @@ -289,13 +290,15 @@ class Template(Node): self._blank_param_value(param.value) keep_field = False else: - self.params.remove(param) + to_remove.append(param) else: - if self._remove_without_field(param, i, force_no_field): - self.params.remove(param) + if self._remove_without_field(param, i): + to_remove.append(param) else: self._blank_param_value(param.value) if not removed: removed = True if not removed: raise ValueError(name) + for param in to_remove: + self.params.remove(param) diff --git a/tests/test_template.py b/tests/test_template.py index fde7522..ecac917 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -98,7 +98,7 @@ class TestTemplate(TreeEqualityTestCase): """test Template.has_param()""" node1 = Template(wrap([Text("foobar")])) node2 = Template(wrap([Text("foo")]), - [pgenh("1", "bar"), pgens("abc", "def")]) + [pgenh("1", "bar"), pgens("\nabc ", "def")]) node3 = Template(wrap([Text("foo")]), [pgenh("1", "a"), pgens("b", "c"), pgens("1", "d")]) node4 = Template(wrap([Text("foo")]), @@ -108,7 +108,7 @@ class TestTemplate(TreeEqualityTestCase): self.assertTrue(node2.has_param("abc")) self.assertFalse(node2.has_param("def")) self.assertTrue(node3.has_param("1")) - self.assertTrue(node3.has_param("b")) + self.assertTrue(node3.has_param(" b ")) self.assertFalse(node4.has_param("b")) self.assertTrue(node3.has_param("b", False)) self.assertTrue(node4.has_param("b", False)) @@ -123,7 +123,7 @@ class TestTemplate(TreeEqualityTestCase): node3p2 = pgens("1", "d") node3 = Template(wrap([Text("foo")]), [pgenh("1", "a"), node3p1, node3p2]) - node4p1 = pgens("b", " ") + node4p1 = pgens(" b", " ") node4 = Template(wrap([Text("foo")]), [pgenh("1", "a"), node4p1]) self.assertRaises(ValueError, node1.get, "foobar") self.assertIs(node2p1, node2.get(1)) @@ -131,10 +131,56 @@ class TestTemplate(TreeEqualityTestCase): self.assertRaises(ValueError, node2.get, "def") self.assertIs(node3p1, node3.get("b")) self.assertIs(node3p2, node3.get("1")) - self.assertIs(node4p1, node4.get("b")) + self.assertIs(node4p1, node4.get("b ")) # add - # remove + + def test_remove(self): + """test Template.remove()""" + node1 = Template(wrap([Text("foobar")])) + node2 = Template(wrap([Text("foo")]), [pgenh("1", "bar"), + pgens("abc", "def")]) + node3 = Template(wrap([Text("foo")]), [pgenh("1", "bar"), + pgens("abc", "def")]) + node4 = Template(wrap([Text("foo")]), [pgenh("1", "bar"), + pgenh("2", "baz")]) + node5 = Template(wrap([Text("foo")]), [ + pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) + node6 = Template(wrap([Text("foo")]), [ + pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) + node7 = Template(wrap([Text("foo")]), [ + pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")]) + node8 = Template(wrap([Text("foo")]), [ + pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")]) + node9 = Template(wrap([Text("foo")]), [ + pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) + node10 = Template(wrap([Text("foo")]), [ + pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) + + node2.remove("1") + node2.remove("abc") + node3.remove(1, keep_field=True) + node3.remove("abc", keep_field=True) + node4.remove("1", keep_field=False) + node5.remove("a", keep_field=False) + node6.remove("a", keep_field=True) + node7.remove(1, keep_field=True) + node8.remove(1, keep_field=False) + node9.remove(1, keep_field=True) + node10.remove(1, keep_field=False) + + self.assertRaises(ValueError, node1.remove, 1) + self.assertRaises(ValueError, node1.remove, "a") + self.assertRaises(ValueError, node2.remove, "1") + self.assertEquals("{{foo}}", node2) + self.assertEquals("{{foo||abc=}}", node3) + self.assertEquals("{{foo||baz}}", node4) + self.assertEquals("{{foo|b=c}}", node5) + self.assertEquals("{{foo| a=|b=c}}", node6) + self.assertEquals("{{foo|1 =|2=c}}", node7) + self.assertEquals("{{foo|2=c}}", node8) + self.assertEquals("{{foo||c}}", node9) + self.assertEquals("{{foo||c}}", node10) if __name__ == "__main__": unittest.main(verbosity=2) From 6af2f3b0639ea515a343cbb36a38daff661f8e62 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 24 Apr 2013 17:46:53 -0400 Subject: [PATCH 139/180] assertEquals -> assertEqual --- tests/test_template.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/test_template.py b/tests/test_template.py index ecac917..0895219 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -172,15 +172,15 @@ class TestTemplate(TreeEqualityTestCase): self.assertRaises(ValueError, node1.remove, 1) self.assertRaises(ValueError, node1.remove, "a") self.assertRaises(ValueError, node2.remove, "1") - self.assertEquals("{{foo}}", node2) - self.assertEquals("{{foo||abc=}}", node3) - self.assertEquals("{{foo||baz}}", node4) - self.assertEquals("{{foo|b=c}}", node5) - self.assertEquals("{{foo| a=|b=c}}", node6) - self.assertEquals("{{foo|1 =|2=c}}", node7) - self.assertEquals("{{foo|2=c}}", node8) - self.assertEquals("{{foo||c}}", node9) - self.assertEquals("{{foo||c}}", node10) + self.assertEqual("{{foo}}", node2) + self.assertEqual("{{foo||abc=}}", node3) + self.assertEqual("{{foo||baz}}", node4) + self.assertEqual("{{foo|b=c}}", node5) + self.assertEqual("{{foo| a=|b=c}}", node6) + self.assertEqual("{{foo|1 =|2=c}}", node7) + self.assertEqual("{{foo|2=c}}", node8) + self.assertEqual("{{foo||c}}", node9) + self.assertEqual("{{foo||c}}", node10) if __name__ == "__main__": unittest.main(verbosity=2) From b46c98b0121d6b9bbb13720a658a3a8b0237932e Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 25 Apr 2013 10:22:20 -0400 Subject: [PATCH 140/180] Clean up template.add(); add a before param but do not implement yet. --- mwparserfromhell/nodes/template.py | 47 +++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index 751c2b1..4b74971 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -194,20 +194,30 @@ class Template(Node): return param raise ValueError(name) - def add(self, name, value, showkey=None, force_nonconformity=False): + def add(self, name, value, showkey=None, before=None, + preserve_spacing=True): """Add a parameter to the template with a given *name* and *value*. *name* and *value* can be anything parasable by - :py:func:`.utils.parse_anything`; pipes (and equal signs, if - appropriate) are automatically escaped from *value* where applicable. + :py:func:`.utils.parse_anything`; pipes and equal signs are + automatically escaped from *value* when appropriate. + If *showkey* is given, this will determine whether or not to show the parameter's name (e.g., ``{{foo|bar}}``'s parameter has a name of ``"1"`` but it is hidden); otherwise, we'll make a safe and intelligent guess. If *name* is already a parameter, we'll replace its value while - keeping the same spacing rules unless *force_nonconformity* is - ``True``. We will also try to guess the dominant spacing convention - when adding a new parameter using :py:meth:`_get_spacing_conventions` - unless *force_nonconformity* is ``True``. + keeping the same spacing rules. We will also try to guess the dominant + spacing convention when adding a new parameter using + :py:meth:`_get_spacing_conventions`. + + If *before* is given (either a :py:class:`~.Parameter` object or a + name), then we will place the parameter immediately before this one. + Otherwise, it will be added at the end. This is ignored if the + parameter already exists. + + If *preserve_spacing* is ``False``, we will avoid preserving spacing + conventions when changing the value of an existing parameter or when + adding a new one. """ name, value = parse_anything(name), parse_anything(value) self._surface_escape(value, "|") @@ -220,10 +230,10 @@ class Template(Node): self._surface_escape(value, "=") existing.showkey = showkey nodes = existing.value.nodes - if force_nonconformity: - existing.value = value - else: + if preserve_spacing: existing.value = parse_anything([nodes[0], value, nodes[1]]) + else: + existing.value = value return existing if showkey is None: @@ -245,22 +255,11 @@ class Template(Node): if not showkey: self._surface_escape(value, "=") - if not force_nonconformity: + if preserve_spacing: before_n, after_n = self._get_spacing_conventions(use_names=True) - if before_n and after_n: - name = parse_anything([before_n, name, after_n]) - elif before_n: - name = parse_anything([before_n, name]) - elif after_n: - name = parse_anything([name, after_n]) - before_v, after_v = self._get_spacing_conventions(use_names=False) - if before_v and after_v: - value = parse_anything([before_v, value, after_v]) - elif before_v: - value = parse_anything([before_v, value]) - elif after_v: - value = parse_anything([value, after_v]) + name = parse_anything([before_n, name, after_n]) + value = parse_anything([before_v, value, after_v]) param = Parameter(name, value, showkey) self.params.append(param) From 2ca3b2805e5a346600508e3e622bddad6be38f93 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 26 Apr 2013 10:39:53 -0400 Subject: [PATCH 141/180] Implement 'before' parameter for Template.add() (closes #21) --- mwparserfromhell/nodes/template.py | 11 ++++++++--- tests/test_template.py | 22 +++++++++++++++++++++- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index 4b74971..9d28be4 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -226,9 +226,9 @@ class Template(Node): self.remove(name, keep_field=True) existing = self.get(name) if showkey is not None: - if not showkey: - self._surface_escape(value, "=") existing.showkey = showkey + if not existing.showkey: + self._surface_escape(value, "=") nodes = existing.value.nodes if preserve_spacing: existing.value = parse_anything([nodes[0], value, nodes[1]]) @@ -262,7 +262,12 @@ class Template(Node): value = parse_anything([before_v, value, after_v]) param = Parameter(name, value, showkey) - self.params.append(param) + if before: + if not isinstance(before, Parameter): + before = self.get(before) + self.params.insert(self.params.index(before), param) + else: + self.params.append(param) return param def remove(self, name, keep_field=False): diff --git a/tests/test_template.py b/tests/test_template.py index 0895219..a1661f2 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -133,7 +133,27 @@ class TestTemplate(TreeEqualityTestCase): self.assertIs(node3p2, node3.get("1")) self.assertIs(node4p1, node4.get("b ")) - # add + def test_add(self): + """test Template.add()""" + # add new param with showkey to end + # add new param without showkey to end + # add new param to end with an escapable | + # add new param with showkey to end with an escapable = + # add new param without showkey to end with an escapable = + # add new param with showkey to end preserving spacing (x3) + # add new param without showkey to end not preserving spacing + # add new param guessing showkey where key is to be shown + # add new param guessing showkey where key is to be shown with an escapable = + # add new param guessing showkey where key is not to be shown + # add new param guessing showkey where key is not to be shown with an escapable = + # add existing parameter without modifying showkey + # add existing parameter without modifying showkey with an escapable = + # add existing parameter with modifying showkey + # add existing parameter with modifying showkey with an escapable = + # add existing parameter preserving spacing (x3) + # add existing parameter not preserving spacing + # add existing parameter when there are multiple params involved + # add existing parameter when there are multiple params involved; params with dependencies def test_remove(self): """test Template.remove()""" From 81849013bc31b12b1a82a98ff0b4a25ccb597822 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 2 May 2013 11:01:13 -0400 Subject: [PATCH 142/180] Finishing tests for Templates; some fixes. --- mwparserfromhell/nodes/template.py | 21 ++- tests/test_template.py | 262 +++++++++++++++++++++++++++++-------- 2 files changed, 220 insertions(+), 63 deletions(-) diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index 9d28be4..3834d41 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -81,7 +81,7 @@ class Template(Node): in parameter names or values so they are not mistaken for new parameters. """ - replacement = HTMLEntity(value=ord(char)) + replacement = str(HTMLEntity(value=ord(char))) for node in code.filter_text(recursive=False): if char in node: code.replace(node, node.replace(char, replacement)) @@ -107,7 +107,7 @@ class Template(Node): values = tuple(theories.values()) best = max(values) confidence = float(best) / sum(values) - if confidence > 0.75: + if confidence >= 0.75: return tuple(theories.keys())[values.index(best)] def _get_spacing_conventions(self, use_names): @@ -205,15 +205,19 @@ class Template(Node): If *showkey* is given, this will determine whether or not to show the parameter's name (e.g., ``{{foo|bar}}``'s parameter has a name of ``"1"`` but it is hidden); otherwise, we'll make a safe and intelligent - guess. If *name* is already a parameter, we'll replace its value while - keeping the same spacing rules. We will also try to guess the dominant - spacing convention when adding a new parameter using + guess. + + If *name* is already a parameter in the template, we'll replace its + value while keeping the same whitespace around it. We will also try to + guess the dominant spacing convention when adding a new parameter using :py:meth:`_get_spacing_conventions`. If *before* is given (either a :py:class:`~.Parameter` object or a name), then we will place the parameter immediately before this one. - Otherwise, it will be added at the end. This is ignored if the - parameter already exists. + Otherwise, it will be added at the end. If *before* is a name and + exists multiple times in the template, we will place it before the last + occurance. If *before* is not in the template, :py:exc:`ValueError` is + raised. The argument is ignored if the new parameter already exists. If *preserve_spacing* is ``False``, we will avoid preserving spacing conventions when changing the value of an existing parameter or when @@ -231,6 +235,9 @@ class Template(Node): self._surface_escape(value, "=") nodes = existing.value.nodes if preserve_spacing: + for i in range(2): # Ignore empty text nodes + if not nodes[i]: + nodes[i] = None existing.value = parse_anything([nodes[0], value, nodes[1]]) else: existing.value = value diff --git a/tests/test_template.py b/tests/test_template.py index a1661f2..3eb88ad 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -24,31 +24,32 @@ from __future__ import unicode_literals import unittest from mwparserfromhell.compat import str -from mwparserfromhell.nodes import Template, Text +from mwparserfromhell.nodes import HTMLEntity, Template, Text from mwparserfromhell.nodes.extras import Parameter from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode from ._test_tree_equality import TreeEqualityTestCase wrap = lambda L: Wikicode(SmartList(L)) -pgens = lambda k, v: Parameter(wrap([Text(k)]), wrap([Text(v)]), True) -pgenh = lambda k, v: Parameter(wrap([Text(k)]), wrap([Text(v)]), False) +wraptext = lambda t: wrap([Text(t)]) +pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True) +pgenh = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=False) class TestTemplate(TreeEqualityTestCase): """Test cases for the Template node.""" def test_unicode(self): """test Template.__unicode__()""" - node = Template(wrap([Text("foobar")])) + node = Template(wraptext("foobar")) self.assertEqual("{{foobar}}", str(node)) - node2 = Template(wrap([Text("foo")]), + node2 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("abc", "def")]) self.assertEqual("{{foo|bar|abc=def}}", str(node2)) def test_strip(self): """test Template.__strip__()""" - node1 = Template(wrap([Text("foobar")])) - node2 = Template(wrap([Text("foo")]), + node1 = Template(wraptext("foobar")) + node2 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("abc", "def")]) for a in (True, False): for b in (True, False): @@ -61,8 +62,8 @@ class TestTemplate(TreeEqualityTestCase): getter, marker = object(), object() get = lambda code: output.append((getter, code)) mark = lambda: output.append(marker) - node1 = Template(wrap([Text("foobar")])) - node2 = Template(wrap([Text("foo")]), + node1 = Template(wraptext("foobar")) + node2 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("abc", "def")]) node1.__showtree__(output.append, get, mark) node2.__showtree__(output.append, get, mark) @@ -76,33 +77,32 @@ class TestTemplate(TreeEqualityTestCase): def test_name(self): """test getter/setter for the name attribute""" - name = wrap([Text("foobar")]) + name = wraptext("foobar") node1 = Template(name) node2 = Template(name, [pgenh("1", "bar")]) self.assertIs(name, node1.name) self.assertIs(name, node2.name) node1.name = "asdf" node2.name = "téstïng" - self.assertWikicodeEqual(wrap([Text("asdf")]), node1.name) - self.assertWikicodeEqual(wrap([Text("téstïng")]), node2.name) + self.assertWikicodeEqual(wraptext("asdf"), node1.name) + self.assertWikicodeEqual(wraptext("téstïng"), node2.name) def test_params(self): """test getter for the params attribute""" - node1 = Template(wrap([Text("foobar")])) + node1 = Template(wraptext("foobar")) plist = [pgenh("1", "bar"), pgens("abc", "def")] - node2 = Template(wrap([Text("foo")]), plist) + node2 = Template(wraptext("foo"), plist) self.assertEqual([], node1.params) self.assertIs(plist, node2.params) def test_has_param(self): """test Template.has_param()""" - node1 = Template(wrap([Text("foobar")])) - node2 = Template(wrap([Text("foo")]), + node1 = Template(wraptext("foobar")) + node2 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("\nabc ", "def")]) - node3 = Template(wrap([Text("foo")]), + node3 = Template(wraptext("foo"), [pgenh("1", "a"), pgens("b", "c"), pgens("1", "d")]) - node4 = Template(wrap([Text("foo")]), - [pgenh("1", "a"), pgens("b", " ")]) + node4 = Template(wraptext("foo"), [pgenh("1", "a"), pgens("b", " ")]) self.assertFalse(node1.has_param("foobar")) self.assertTrue(node2.has_param(1)) self.assertTrue(node2.has_param("abc")) @@ -115,16 +115,15 @@ class TestTemplate(TreeEqualityTestCase): def test_get(self): """test Template.get()""" - node1 = Template(wrap([Text("foobar")])) + node1 = Template(wraptext("foobar")) node2p1 = pgenh("1", "bar") node2p2 = pgens("abc", "def") - node2 = Template(wrap([Text("foo")]), [node2p1, node2p2]) + node2 = Template(wraptext("foo"), [node2p1, node2p2]) node3p1 = pgens("b", "c") node3p2 = pgens("1", "d") - node3 = Template(wrap([Text("foo")]), - [pgenh("1", "a"), node3p1, node3p2]) + node3 = Template(wraptext("foo"), [pgenh("1", "a"), node3p1, node3p2]) node4p1 = pgens(" b", " ") - node4 = Template(wrap([Text("foo")]), [pgenh("1", "a"), node4p1]) + node4 = Template(wraptext("foo"), [pgenh("1", "a"), node4p1]) self.assertRaises(ValueError, node1.get, "foobar") self.assertIs(node2p1, node2.get(1)) self.assertIs(node2p2, node2.get("abc")) @@ -135,46 +134,197 @@ class TestTemplate(TreeEqualityTestCase): def test_add(self): """test Template.add()""" - # add new param with showkey to end - # add new param without showkey to end - # add new param to end with an escapable | - # add new param with showkey to end with an escapable = - # add new param without showkey to end with an escapable = - # add new param with showkey to end preserving spacing (x3) - # add new param without showkey to end not preserving spacing - # add new param guessing showkey where key is to be shown - # add new param guessing showkey where key is to be shown with an escapable = - # add new param guessing showkey where key is not to be shown - # add new param guessing showkey where key is not to be shown with an escapable = - # add existing parameter without modifying showkey - # add existing parameter without modifying showkey with an escapable = - # add existing parameter with modifying showkey - # add existing parameter with modifying showkey with an escapable = - # add existing parameter preserving spacing (x3) - # add existing parameter not preserving spacing - # add existing parameter when there are multiple params involved - # add existing parameter when there are multiple params involved; params with dependencies + node1 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) + node2 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) + node3 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) + node4 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) + node5 = Template(wraptext("a"), [pgens("b", "c"), + pgens(" d ", "e")]) + node6 = Template(wraptext("a"), [pgens("b", "c"), pgens("b", "d"), + pgens("b", "e")]) + node7 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) + node8p = pgenh("1", "d") + node8 = Template(wraptext("a"), [pgens("b", "c"), node8p]) + node9 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) + node10 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "e")]) + node11 = Template(wraptext("a"), [pgens("b", "c")]) + node12 = Template(wraptext("a"), [pgens("b", "c")]) + node13 = Template(wraptext("a"), [pgens("\nb ", " c"), + pgens("\nd ", " e"), + pgens("\nf ", " g")]) + node14 = Template(wraptext("a\n"), [pgens("b ", "c\n"), + pgens("d ", " e"), + pgens("f ", "g\n"), + pgens("h ", " i\n")]) + node15 = Template(wraptext("a"), [pgens("b ", " c\n"), + pgens("\nd ", " e"), + pgens("\nf ", "g ")]) + node16 = Template(wraptext("a"), [pgens("\nb ", " c"), + pgens("\nd ", " e"), + pgens("\nf ", " g")]) + node17 = Template(wraptext("a"), [pgens("\nb ", " c"), + pgens("\nd ", " e"), + pgens("\nf ", " g")]) + node18 = Template(wraptext("a\n"), [pgens("b ", "c\n"), + pgens("d ", " e"), + pgens("f ", "g\n"), + pgens("h ", " i\n")]) + node19 = Template(wraptext("a"), [pgens("b ", " c\n"), + pgens("\nd ", " e"), + pgens("\nf ", "g ")]) + node20 = Template(wraptext("a"), [pgens("\nb ", " c"), + pgens("\nd ", " e"), + pgens("\nf ", " g")]) + node21 = Template(wraptext("a"), [pgenh("1", "b")]) + node22 = Template(wraptext("a"), [pgenh("1", "b")]) + node23 = Template(wraptext("a"), [pgenh("1", "b")]) + node24 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), + pgenh("3", "d"), pgenh("4", "e")]) + node25 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), + pgens("4", "d"), pgens("5", "e")]) + node26 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), + pgens("4", "d"), pgens("5", "e")]) + node27 = Template(wraptext("a"), [pgenh("1", "b")]) + node28 = Template(wraptext("a"), [pgenh("1", "b")]) + node29 = Template(wraptext("a"), [pgens("b", "c")]) + node30 = Template(wraptext("a"), [pgenh("1", "b")]) + node31 = Template(wraptext("a"), [pgenh("1", "b")]) + node32 = Template(wraptext("a"), [pgens("1", "b")]) + node33 = Template(wraptext("a"), [pgens("\nb ", " c"), + pgens("\nd ", " e"), + pgens("\nf ", " g")]) + node34 = Template(wraptext("a\n"), [pgens("b ", "c\n"), + pgens("d ", " e"), + pgens("f ", "g\n"), + pgens("h ", " i\n")]) + node35 = Template(wraptext("a"), [pgens("b ", " c\n"), + pgens("\nd ", " e"), + pgens("\nf ", "g ")]) + node36 = Template(wraptext("a"), [pgens("\nb ", " c "), + pgens("\nd ", " e "), + pgens("\nf ", " g ")]) + node37 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), + pgens("b", "f"), pgens("b", "h"), + pgens("i", "j")]) + node37 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), + pgens("b", "f"), pgens("b", "h"), + pgens("i", "j")]) + node38 = Template(wraptext("a"), [pgens("1", "b"), pgens("x", "y"), + pgens("1", "c"), pgens("2", "d")]) + node39 = Template(wraptext("a"), [pgens("1", "b"), pgens("x", "y"), + pgenh("1", "c"), pgenh("2", "d")]) + node40 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), + pgens("f", "g")]) + + node1.add("e", "f", showkey=True) + node2.add(2, "g", showkey=False) + node3.add("e", "foo|bar", showkey=True) + node4.add("e", "f", showkey=True, before="b") + node5.add("f", "g", showkey=True, before=" d ") + node6.add("f", "g", showkey=True, before="b") + self.assertRaises(ValueError, node7.add, "e", "f", showkey=True, + before="q") + node8.add("e", "f", showkey=True, before=node8p) + node9.add("e", "f", showkey=True, before=pgenh("1", "d")) + self.assertRaises(ValueError, node10.add, "e", "f", showkey=True, + before=pgenh("1", "d")) + node11.add("d", "foo=bar", showkey=True) + node12.add("1", "foo=bar", showkey=False) + node13.add("h", "i", showkey=True) + node14.add("j", "k", showkey=True) + node15.add("h", "i", showkey=True) + node16.add("h", "i", showkey=True, preserve_spacing=False) + node17.add("h", "i", showkey=False) + node18.add("j", "k", showkey=False) + node19.add("h", "i", showkey=False) + node20.add("h", "i", showkey=False, preserve_spacing=False) + node21.add("2", "c") + node22.add("3", "c") + node23.add("c", "d") + node24.add("5", "f") + node25.add("3", "f") + node26.add("6", "f") + node27.add("c", "foo=bar") + node28.add("2", "foo=bar") + node29.add("b", "d") + node30.add("1", "foo=bar") + node31.add("1", "foo=bar", showkey=True) + node32.add("1", "foo=bar", showkey=False) + node33.add("d", "foo") + node34.add("f", "foo") + node35.add("f", "foo") + node36.add("d", "foo", preserve_spacing=False) + node37.add("b", "k") + node38.add("1", "e") + node39.add("1", "e") + node40.add("d", "h", before="b") + + self.assertEquals("{{a|b=c|d|e=f}}", node1) + self.assertEquals("{{a|b=c|d|g}}", node2) + self.assertEquals("{{a|b=c|d|e=foo|bar}}", node3) + self.assertIsInstance(node3.params[2].value.get(1), HTMLEntity) + self.assertEquals("{{a|e=f|b=c|d}}", node4) + self.assertEquals("{{a|b=c|f=g| d =e}}", node5) + self.assertEquals("{{a|b=c|b=d|f=g|b=e}}", node6) + self.assertEquals("{{a|b=c|d}}", node7) + self.assertEquals("{{a|b=c|e=f|d}}", node8) + self.assertEquals("{{a|b=c|e=f|d}}", node9) + self.assertEquals("{{a|b=c|e}}", node10) + self.assertEquals("{{a|b=c|d=foo=bar}}", node11) + self.assertEquals("{{a|b=c|foo=bar}}", node12) + self.assertIsInstance(node12.params[1].value.get(1), HTMLEntity) + self.assertEquals("{{a|\nb = c|\nd = e|\nf = g|\nh = i}}", node13) + self.assertEquals("{{a\n|b =c\n|d = e|f =g\n|h = i\n|j =k\n}}", node14) + self.assertEquals("{{a|b = c\n|\nd = e|\nf =g |h =i}}", node15) + self.assertEquals("{{a|\nb = c|\nd = e|\nf = g|h=i}}", node16) + self.assertEquals("{{a|\nb = c|\nd = e|\nf = g| i}}", node17) + self.assertEquals("{{a\n|b =c\n|d = e|f =g\n|h = i\n|k\n}}", node18) + self.assertEquals("{{a|b = c\n|\nd = e|\nf =g |i}}", node19) + self.assertEquals("{{a|\nb = c|\nd = e|\nf = g|i}}", node20) + self.assertEquals("{{a|b|c}}", node21) + self.assertEquals("{{a|b|3=c}}", node22) + self.assertEquals("{{a|b|c=d}}", node23) + self.assertEquals("{{a|b|c|d|e|f}}", node24) + self.assertEquals("{{a|b|c|4=d|5=e|f}}", node25) + self.assertEquals("{{a|b|c|4=d|5=e|6=f}}", node26) + self.assertEquals("{{a|b|c=foo=bar}}", node27) + self.assertEquals("{{a|b|foo=bar}}", node28) + self.assertIsInstance(node28.params[1].value.get(1), HTMLEntity) + self.assertEquals("{{a|b=d}}", node29) + self.assertEquals("{{a|foo=bar}}", node30) + self.assertIsInstance(node30.params[0].value.get(1), HTMLEntity) + self.assertEquals("{{a|1=foo=bar}}", node31) + self.assertEquals("{{a|foo=bar}}", node32) + self.assertIsInstance(node32.params[0].value.get(1), HTMLEntity) + self.assertEquals("{{a|\nb = c|\nd = foo|\nf = g}}", node33) + self.assertEquals("{{a\n|b =c\n|d = e|f =foo\n|h = i\n}}", node34) + self.assertEquals("{{a|b = c\n|\nd = e|\nf =foo }}", node35) + self.assertEquals("{{a|\nb = c |\nd =foo|\nf = g }}", node36) + self.assertEquals("{{a|b=k|d=e|i=j}}", node37) + self.assertEquals("{{a|1=e|x=y|2=d}}", node38) + self.assertEquals("{{a|x=y|e|d}}", node39) + self.assertEquals("{{a|b=c|d=h|f=g}}", node40) def test_remove(self): """test Template.remove()""" - node1 = Template(wrap([Text("foobar")])) - node2 = Template(wrap([Text("foo")]), [pgenh("1", "bar"), - pgens("abc", "def")]) - node3 = Template(wrap([Text("foo")]), [pgenh("1", "bar"), - pgens("abc", "def")]) - node4 = Template(wrap([Text("foo")]), [pgenh("1", "bar"), - pgenh("2", "baz")]) - node5 = Template(wrap([Text("foo")]), [ + node1 = Template(wraptext("foobar")) + node2 = Template(wraptext("foo"), [pgenh("1", "bar"), + pgens("abc", "def")]) + node3 = Template(wraptext("foo"), [pgenh("1", "bar"), + pgens("abc", "def")]) + node4 = Template(wraptext("foo"), [pgenh("1", "bar"), + pgenh("2", "baz")]) + node5 = Template(wraptext("foo"), [ pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) - node6 = Template(wrap([Text("foo")]), [ + node6 = Template(wraptext("foo"), [ pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) - node7 = Template(wrap([Text("foo")]), [ + node7 = Template(wraptext("foo"), [ pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")]) - node8 = Template(wrap([Text("foo")]), [ + node8 = Template(wraptext("foo"), [ pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")]) - node9 = Template(wrap([Text("foo")]), [ + node9 = Template(wraptext("foo"), [ pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) - node10 = Template(wrap([Text("foo")]), [ + node10 = Template(wraptext("foo"), [ pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) node2.remove("1") From 1d26c4b312207f956c29c224f34814e486607757 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 2 May 2013 22:40:35 -0400 Subject: [PATCH 143/180] Why do I always make this mistake? --- tests/test_template.py | 80 +++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/tests/test_template.py b/tests/test_template.py index 3eb88ad..b9fd6e8 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -259,51 +259,51 @@ class TestTemplate(TreeEqualityTestCase): node39.add("1", "e") node40.add("d", "h", before="b") - self.assertEquals("{{a|b=c|d|e=f}}", node1) - self.assertEquals("{{a|b=c|d|g}}", node2) - self.assertEquals("{{a|b=c|d|e=foo|bar}}", node3) + self.assertEqual("{{a|b=c|d|e=f}}", node1) + self.assertEqual("{{a|b=c|d|g}}", node2) + self.assertEqual("{{a|b=c|d|e=foo|bar}}", node3) self.assertIsInstance(node3.params[2].value.get(1), HTMLEntity) - self.assertEquals("{{a|e=f|b=c|d}}", node4) - self.assertEquals("{{a|b=c|f=g| d =e}}", node5) - self.assertEquals("{{a|b=c|b=d|f=g|b=e}}", node6) - self.assertEquals("{{a|b=c|d}}", node7) - self.assertEquals("{{a|b=c|e=f|d}}", node8) - self.assertEquals("{{a|b=c|e=f|d}}", node9) - self.assertEquals("{{a|b=c|e}}", node10) - self.assertEquals("{{a|b=c|d=foo=bar}}", node11) - self.assertEquals("{{a|b=c|foo=bar}}", node12) + self.assertEqual("{{a|e=f|b=c|d}}", node4) + self.assertEqual("{{a|b=c|f=g| d =e}}", node5) + self.assertEqual("{{a|b=c|b=d|f=g|b=e}}", node6) + self.assertEqual("{{a|b=c|d}}", node7) + self.assertEqual("{{a|b=c|e=f|d}}", node8) + self.assertEqual("{{a|b=c|e=f|d}}", node9) + self.assertEqual("{{a|b=c|e}}", node10) + self.assertEqual("{{a|b=c|d=foo=bar}}", node11) + self.assertEqual("{{a|b=c|foo=bar}}", node12) self.assertIsInstance(node12.params[1].value.get(1), HTMLEntity) - self.assertEquals("{{a|\nb = c|\nd = e|\nf = g|\nh = i}}", node13) - self.assertEquals("{{a\n|b =c\n|d = e|f =g\n|h = i\n|j =k\n}}", node14) - self.assertEquals("{{a|b = c\n|\nd = e|\nf =g |h =i}}", node15) - self.assertEquals("{{a|\nb = c|\nd = e|\nf = g|h=i}}", node16) - self.assertEquals("{{a|\nb = c|\nd = e|\nf = g| i}}", node17) - self.assertEquals("{{a\n|b =c\n|d = e|f =g\n|h = i\n|k\n}}", node18) - self.assertEquals("{{a|b = c\n|\nd = e|\nf =g |i}}", node19) - self.assertEquals("{{a|\nb = c|\nd = e|\nf = g|i}}", node20) - self.assertEquals("{{a|b|c}}", node21) - self.assertEquals("{{a|b|3=c}}", node22) - self.assertEquals("{{a|b|c=d}}", node23) - self.assertEquals("{{a|b|c|d|e|f}}", node24) - self.assertEquals("{{a|b|c|4=d|5=e|f}}", node25) - self.assertEquals("{{a|b|c|4=d|5=e|6=f}}", node26) - self.assertEquals("{{a|b|c=foo=bar}}", node27) - self.assertEquals("{{a|b|foo=bar}}", node28) + self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|\nh = i}}", node13) + self.assertEqual("{{a\n|b =c\n|d = e|f =g\n|h = i\n|j =k\n}}", node14) + self.assertEqual("{{a|b = c\n|\nd = e|\nf =g |h =i}}", node15) + self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|h=i}}", node16) + self.assertEqual("{{a|\nb = c|\nd = e|\nf = g| i}}", node17) + self.assertEqual("{{a\n|b =c\n|d = e|f =g\n|h = i\n|k\n}}", node18) + self.assertEqual("{{a|b = c\n|\nd = e|\nf =g |i}}", node19) + self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|i}}", node20) + self.assertEqual("{{a|b|c}}", node21) + self.assertEqual("{{a|b|3=c}}", node22) + self.assertEqual("{{a|b|c=d}}", node23) + self.assertEqual("{{a|b|c|d|e|f}}", node24) + self.assertEqual("{{a|b|c|4=d|5=e|f}}", node25) + self.assertEqual("{{a|b|c|4=d|5=e|6=f}}", node26) + self.assertEqual("{{a|b|c=foo=bar}}", node27) + self.assertEqual("{{a|b|foo=bar}}", node28) self.assertIsInstance(node28.params[1].value.get(1), HTMLEntity) - self.assertEquals("{{a|b=d}}", node29) - self.assertEquals("{{a|foo=bar}}", node30) + self.assertEqual("{{a|b=d}}", node29) + self.assertEqual("{{a|foo=bar}}", node30) self.assertIsInstance(node30.params[0].value.get(1), HTMLEntity) - self.assertEquals("{{a|1=foo=bar}}", node31) - self.assertEquals("{{a|foo=bar}}", node32) + self.assertEqual("{{a|1=foo=bar}}", node31) + self.assertEqual("{{a|foo=bar}}", node32) self.assertIsInstance(node32.params[0].value.get(1), HTMLEntity) - self.assertEquals("{{a|\nb = c|\nd = foo|\nf = g}}", node33) - self.assertEquals("{{a\n|b =c\n|d = e|f =foo\n|h = i\n}}", node34) - self.assertEquals("{{a|b = c\n|\nd = e|\nf =foo }}", node35) - self.assertEquals("{{a|\nb = c |\nd =foo|\nf = g }}", node36) - self.assertEquals("{{a|b=k|d=e|i=j}}", node37) - self.assertEquals("{{a|1=e|x=y|2=d}}", node38) - self.assertEquals("{{a|x=y|e|d}}", node39) - self.assertEquals("{{a|b=c|d=h|f=g}}", node40) + self.assertEqual("{{a|\nb = c|\nd = foo|\nf = g}}", node33) + self.assertEqual("{{a\n|b =c\n|d = e|f =foo\n|h = i\n}}", node34) + self.assertEqual("{{a|b = c\n|\nd = e|\nf =foo }}", node35) + self.assertEqual("{{a|\nb = c |\nd =foo|\nf = g }}", node36) + self.assertEqual("{{a|b=k|d=e|i=j}}", node37) + self.assertEqual("{{a|1=e|x=y|2=d}}", node38) + self.assertEqual("{{a|x=y|e|d}}", node39) + self.assertEqual("{{a|b=c|d=h|f=g}}", node40) def test_remove(self): """test Template.remove()""" From 3b78541eeb19cf0cb528cd856e8f3048d354fb4e Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 3 May 2013 10:57:30 -0400 Subject: [PATCH 144/180] Clean up indentation. --- tests/test_template.py | 66 +++++++++++++++++++++----------------------------- 1 file changed, 27 insertions(+), 39 deletions(-) diff --git a/tests/test_template.py b/tests/test_template.py index b9fd6e8..31ed33b 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -149,32 +149,24 @@ class TestTemplate(TreeEqualityTestCase): node10 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "e")]) node11 = Template(wraptext("a"), [pgens("b", "c")]) node12 = Template(wraptext("a"), [pgens("b", "c")]) - node13 = Template(wraptext("a"), [pgens("\nb ", " c"), - pgens("\nd ", " e"), - pgens("\nf ", " g")]) - node14 = Template(wraptext("a\n"), [pgens("b ", "c\n"), - pgens("d ", " e"), - pgens("f ", "g\n"), - pgens("h ", " i\n")]) - node15 = Template(wraptext("a"), [pgens("b ", " c\n"), - pgens("\nd ", " e"), - pgens("\nf ", "g ")]) - node16 = Template(wraptext("a"), [pgens("\nb ", " c"), - pgens("\nd ", " e"), - pgens("\nf ", " g")]) - node17 = Template(wraptext("a"), [pgens("\nb ", " c"), - pgens("\nd ", " e"), - pgens("\nf ", " g")]) - node18 = Template(wraptext("a\n"), [pgens("b ", "c\n"), - pgens("d ", " e"), - pgens("f ", "g\n"), - pgens("h ", " i\n")]) - node19 = Template(wraptext("a"), [pgens("b ", " c\n"), - pgens("\nd ", " e"), - pgens("\nf ", "g ")]) - node20 = Template(wraptext("a"), [pgens("\nb ", " c"), - pgens("\nd ", " e"), - pgens("\nf ", " g")]) + node13 = Template(wraptext("a"), [ + pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) + node14 = Template(wraptext("a\n"), [ + pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"), + pgens("h ", " i\n")]) + node15 = Template(wraptext("a"), [ + pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")]) + node16 = Template(wraptext("a"), [ + pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) + node17 = Template(wraptext("a"), [ + pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) + node18 = Template(wraptext("a\n"), [ + pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"), + pgens("h ", " i\n")]) + node19 = Template(wraptext("a"), [ + pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")]) + node20 = Template(wraptext("a"), [ + pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) node21 = Template(wraptext("a"), [pgenh("1", "b")]) node22 = Template(wraptext("a"), [pgenh("1", "b")]) node23 = Template(wraptext("a"), [pgenh("1", "b")]) @@ -190,19 +182,15 @@ class TestTemplate(TreeEqualityTestCase): node30 = Template(wraptext("a"), [pgenh("1", "b")]) node31 = Template(wraptext("a"), [pgenh("1", "b")]) node32 = Template(wraptext("a"), [pgens("1", "b")]) - node33 = Template(wraptext("a"), [pgens("\nb ", " c"), - pgens("\nd ", " e"), - pgens("\nf ", " g")]) - node34 = Template(wraptext("a\n"), [pgens("b ", "c\n"), - pgens("d ", " e"), - pgens("f ", "g\n"), - pgens("h ", " i\n")]) - node35 = Template(wraptext("a"), [pgens("b ", " c\n"), - pgens("\nd ", " e"), - pgens("\nf ", "g ")]) - node36 = Template(wraptext("a"), [pgens("\nb ", " c "), - pgens("\nd ", " e "), - pgens("\nf ", " g ")]) + node33 = Template(wraptext("a"), [ + pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) + node34 = Template(wraptext("a\n"), [ + pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"), + pgens("h ", " i\n")]) + node35 = Template(wraptext("a"), [ + pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")]) + node36 = Template(wraptext("a"), [ + pgens("\nb ", " c "), pgens("\nd ", " e "), pgens("\nf ", " g ")]) node37 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), pgens("b", "f"), pgens("b", "h"), pgens("i", "j")]) From 7853e207451a69081573624856025f2a3f750f83 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 3 May 2013 23:43:57 -0400 Subject: [PATCH 145/180] Move wrap() and wraptext() TO _test_tree_equality. --- tests/_test_tree_equality.py | 4 ++++ tests/test_argument.py | 6 +----- tests/test_builder.py | 6 +----- tests/test_heading.py | 6 +----- tests/test_html_entity.py | 6 +----- tests/test_parameter.py | 6 +----- tests/test_template.py | 6 +----- tests/test_wikilink.py | 6 +----- 8 files changed, 11 insertions(+), 35 deletions(-) diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index 758a72e..a12bd68 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -26,8 +26,12 @@ from unittest import TestCase from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, Tag, Template, Text, Wikilink) from mwparserfromhell.nodes.extras import Attribute, Parameter +from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode +wrap = lambda L: Wikicode(SmartList(L)) +wraptext = lambda t: wrap([Text(t)]) + class TreeEqualityTestCase(TestCase): """A base test case with support for comparing the equality of node trees. diff --git a/tests/test_argument.py b/tests/test_argument.py index e0524c4..ae5ae62 100644 --- a/tests/test_argument.py +++ b/tests/test_argument.py @@ -25,12 +25,8 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Argument, Text -from mwparserfromhell.smart_list import SmartList -from mwparserfromhell.wikicode import Wikicode -from ._test_tree_equality import TreeEqualityTestCase - -wrap = lambda L: Wikicode(SmartList(L)) +from ._test_tree_equality import TreeEqualityTestCase, wrap class TestArgument(TreeEqualityTestCase): """Test cases for the Argument node.""" diff --git a/tests/test_builder.py b/tests/test_builder.py index 1e578ed..76917e8 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -28,12 +28,8 @@ from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, from mwparserfromhell.nodes.extras import Attribute, Parameter from mwparserfromhell.parser import tokens from mwparserfromhell.parser.builder import Builder -from mwparserfromhell.smart_list import SmartList -from mwparserfromhell.wikicode import Wikicode -from ._test_tree_equality import TreeEqualityTestCase - -wrap = lambda L: Wikicode(SmartList(L)) +from ._test_tree_equality import TreeEqualityTestCase, wrap class TestBuilder(TreeEqualityTestCase): """Tests for the builder, which turns tokens into Wikicode objects.""" diff --git a/tests/test_heading.py b/tests/test_heading.py index a0e78e5..88603a8 100644 --- a/tests/test_heading.py +++ b/tests/test_heading.py @@ -25,12 +25,8 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Heading, Text -from mwparserfromhell.smart_list import SmartList -from mwparserfromhell.wikicode import Wikicode -from ._test_tree_equality import TreeEqualityTestCase - -wrap = lambda L: Wikicode(SmartList(L)) +from ._test_tree_equality import TreeEqualityTestCase, wrap class TestHeading(TreeEqualityTestCase): """Test cases for the Heading node.""" diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py index a7a9669..b6b4394 100644 --- a/tests/test_html_entity.py +++ b/tests/test_html_entity.py @@ -25,12 +25,8 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import HTMLEntity -from mwparserfromhell.smart_list import SmartList -from mwparserfromhell.wikicode import Wikicode -from ._test_tree_equality import TreeEqualityTestCase - -wrap = lambda L: Wikicode(SmartList(L)) +from ._test_tree_equality import TreeEqualityTestCase, wrap class TestHTMLEntity(TreeEqualityTestCase): """Test cases for the HTMLEntity node.""" diff --git a/tests/test_parameter.py b/tests/test_parameter.py index b46ad71..8e85eda 100644 --- a/tests/test_parameter.py +++ b/tests/test_parameter.py @@ -26,12 +26,8 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text from mwparserfromhell.nodes.extras import Parameter -from mwparserfromhell.smart_list import SmartList -from mwparserfromhell.wikicode import Wikicode -from ._test_tree_equality import TreeEqualityTestCase - -wrap = lambda L: Wikicode(SmartList(L)) +from ._test_tree_equality import TreeEqualityTestCase, wrap class TestParameter(TreeEqualityTestCase): """Test cases for the Parameter node extra.""" diff --git a/tests/test_template.py b/tests/test_template.py index 31ed33b..81b7382 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -26,12 +26,8 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import HTMLEntity, Template, Text from mwparserfromhell.nodes.extras import Parameter -from mwparserfromhell.smart_list import SmartList -from mwparserfromhell.wikicode import Wikicode -from ._test_tree_equality import TreeEqualityTestCase +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext -wrap = lambda L: Wikicode(SmartList(L)) -wraptext = lambda t: wrap([Text(t)]) pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True) pgenh = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=False) diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py index 422489f..7c02744 100644 --- a/tests/test_wikilink.py +++ b/tests/test_wikilink.py @@ -25,12 +25,8 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text, Wikilink -from mwparserfromhell.smart_list import SmartList -from mwparserfromhell.wikicode import Wikicode -from ._test_tree_equality import TreeEqualityTestCase - -wrap = lambda L: Wikicode(SmartList(L)) +from ._test_tree_equality import TreeEqualityTestCase, wrap class TestWikilink(TreeEqualityTestCase): """Test cases for the Wikilink node.""" From eea5c774e342752dae016d79782bf755ca48de53 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 3 May 2013 23:52:10 -0400 Subject: [PATCH 146/180] Clean up some repetitive lines. --- tests/test_argument.py | 13 ++++--------- tests/test_comment.py | 7 +++---- tests/test_heading.py | 7 +++---- tests/test_html_entity.py | 20 +++++++------------- tests/test_text.py | 7 +++---- tests/test_wikilink.py | 13 ++++--------- 6 files changed, 24 insertions(+), 43 deletions(-) diff --git a/tests/test_argument.py b/tests/test_argument.py index ae5ae62..3a959b6 100644 --- a/tests/test_argument.py +++ b/tests/test_argument.py @@ -41,16 +41,11 @@ class TestArgument(TreeEqualityTestCase): def test_strip(self): """test Argument.__strip__()""" node = Argument(wrap([Text("foobar")])) - self.assertIs(None, node.__strip__(True, True)) - self.assertIs(None, node.__strip__(True, False)) - self.assertIs(None, node.__strip__(False, True)) - self.assertIs(None, node.__strip__(False, False)) - node2 = Argument(wrap([Text("foo")]), wrap([Text("bar")])) - self.assertEqual("bar", node2.__strip__(True, True)) - self.assertEqual("bar", node2.__strip__(True, False)) - self.assertEqual("bar", node2.__strip__(False, True)) - self.assertEqual("bar", node2.__strip__(False, False)) + for a in (True, False): + for b in (True, False): + self.assertIs(None, node.__strip__(a, b)) + self.assertEqual("bar", node2.__strip__(a, b)) def test_showtree(self): """test Argument.__showtree__()""" diff --git a/tests/test_comment.py b/tests/test_comment.py index 980f594..a7a3c4d 100644 --- a/tests/test_comment.py +++ b/tests/test_comment.py @@ -39,10 +39,9 @@ class TestComment(TreeEqualityTestCase): def test_strip(self): """test Comment.__strip__()""" node = Comment("foobar") - self.assertIs(None, node.__strip__(True, True)) - self.assertIs(None, node.__strip__(True, False)) - self.assertIs(None, node.__strip__(False, True)) - self.assertIs(None, node.__strip__(False, False)) + for a in (True, False): + for b in (True, False): + self.assertIs(None, node.__strip__(a, b)) def test_showtree(self): """test Comment.__showtree__()""" diff --git a/tests/test_heading.py b/tests/test_heading.py index 88603a8..79b0ebf 100644 --- a/tests/test_heading.py +++ b/tests/test_heading.py @@ -41,10 +41,9 @@ class TestHeading(TreeEqualityTestCase): def test_strip(self): """test Heading.__strip__()""" node = Heading(wrap([Text("foobar")]), 3) - self.assertEqual("foobar", node.__strip__(True, True)) - self.assertEqual("foobar", node.__strip__(True, False)) - self.assertEqual("foobar", node.__strip__(False, True)) - self.assertEqual("foobar", node.__strip__(False, False)) + for a in (True, False): + for b in (True, False): + self.assertEqual("foobar", node.__strip__(a, b)) def test_showtree(self): """test Heading.__showtree__()""" diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py index b6b4394..d3d23bf 100644 --- a/tests/test_html_entity.py +++ b/tests/test_html_entity.py @@ -47,19 +47,13 @@ class TestHTMLEntity(TreeEqualityTestCase): node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) node2 = HTMLEntity("107", named=False, hexadecimal=False) node3 = HTMLEntity("e9", named=False, hexadecimal=True) - - self.assertEqual("\xa0", node1.__strip__(True, True)) - self.assertEqual("\xa0", node1.__strip__(True, False)) - self.assertEqual(" ", node1.__strip__(False, True)) - self.assertEqual(" ", node1.__strip__(False, False)) - self.assertEqual("k", node2.__strip__(True, True)) - self.assertEqual("k", node2.__strip__(True, False)) - self.assertEqual("k", node2.__strip__(False, True)) - self.assertEqual("k", node2.__strip__(False, False)) - self.assertEqual("é", node3.__strip__(True, True)) - self.assertEqual("é", node3.__strip__(True, False)) - self.assertEqual("é", node3.__strip__(False, True)) - self.assertEqual("é", node3.__strip__(False, False)) + for a in (True, False): + self.assertEqual("\xa0", node1.__strip__(True, a)) + self.assertEqual(" ", node1.__strip__(False, a)) + self.assertEqual("k", node2.__strip__(True, a)) + self.assertEqual("k", node2.__strip__(False, a)) + self.assertEqual("é", node3.__strip__(True, a)) + self.assertEqual("é", node3.__strip__(False, a)) def test_showtree(self): """test HTMLEntity.__showtree__()""" diff --git a/tests/test_text.py b/tests/test_text.py index 13636bf..f3649dd 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -39,10 +39,9 @@ class TestText(unittest.TestCase): def test_strip(self): """test Text.__strip__()""" node = Text("foobar") - self.assertIs(node, node.__strip__(True, True)) - self.assertIs(node, node.__strip__(True, False)) - self.assertIs(node, node.__strip__(False, True)) - self.assertIs(node, node.__strip__(False, False)) + for a in (True, False): + for b in (True, False): + self.assertIs(node, node.__strip__(a, b)) def test_showtree(self): """test Text.__showtree__()""" diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py index 7c02744..09ca5b3 100644 --- a/tests/test_wikilink.py +++ b/tests/test_wikilink.py @@ -41,16 +41,11 @@ class TestWikilink(TreeEqualityTestCase): def test_strip(self): """test Wikilink.__strip__()""" node = Wikilink(wrap([Text("foobar")])) - self.assertEqual("foobar", node.__strip__(True, True)) - self.assertEqual("foobar", node.__strip__(True, False)) - self.assertEqual("foobar", node.__strip__(False, True)) - self.assertEqual("foobar", node.__strip__(False, False)) - node2 = Wikilink(wrap([Text("foo")]), wrap([Text("bar")])) - self.assertEqual("bar", node2.__strip__(True, True)) - self.assertEqual("bar", node2.__strip__(True, False)) - self.assertEqual("bar", node2.__strip__(False, True)) - self.assertEqual("bar", node2.__strip__(False, False)) + for a in (True, False): + for b in (True, False): + self.assertEqual("foobar", node.__strip__(a, b)) + self.assertEqual("bar", node2.__strip__(a, b)) def test_showtree(self): """test Wikilink.__showtree__()""" From 06873ee6edcc88b6ee57d5ad57296655f2fb85c8 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 4 May 2013 15:50:48 -0400 Subject: [PATCH 147/180] Add tests for __iternodes__(); add a getnodes() function. --- tests/_test_tree_equality.py | 9 +++++++++ tests/test_argument.py | 19 ++++++++++++++++++- tests/test_comment.py | 7 +++++++ tests/test_heading.py | 12 +++++++++++- tests/test_html_entity.py | 7 +++++++ tests/test_template.py | 26 +++++++++++++++++++++++++- tests/test_text.py | 7 +++++++ tests/test_wikilink.py | 19 ++++++++++++++++++- 8 files changed, 102 insertions(+), 4 deletions(-) diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index a12bd68..6d9b26a 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -32,6 +32,15 @@ from mwparserfromhell.wikicode import Wikicode wrap = lambda L: Wikicode(SmartList(L)) wraptext = lambda t: wrap([Text(t)]) +def getnodes(code): + """Iterate over all child nodes of a given parent node. + + Imitates Wikicode._get_all_nodes(). + """ + for node in code.nodes: + for context, child in node.__iternodes__(getnodes): + yield child + class TreeEqualityTestCase(TestCase): """A base test case with support for comparing the equality of node trees. diff --git a/tests/test_argument.py b/tests/test_argument.py index 3a959b6..a9469d4 100644 --- a/tests/test_argument.py +++ b/tests/test_argument.py @@ -26,7 +26,7 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Argument, Text -from ._test_tree_equality import TreeEqualityTestCase, wrap +from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap class TestArgument(TreeEqualityTestCase): """Test cases for the Argument node.""" @@ -38,6 +38,23 @@ class TestArgument(TreeEqualityTestCase): node2 = Argument(wrap([Text("foo")]), wrap([Text("bar")])) self.assertEqual("{{{foo|bar}}}", str(node2)) + def test_iternodes(self): + """test Argument.__iternodes__()""" + node1n1 = Text("foobar") + node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("baz") + node1 = Argument(wrap([node1n1])) + node2 = Argument(wrap([node2n1]), wrap([node2n2, node2n3])) + gen1 = node1.__iternodes__(getnodes) + gen2 = node2.__iternodes__(getnodes) + self.assertEqual((None, node1), next(gen1)) + self.assertEqual((None, node2), next(gen2)) + self.assertEqual((node1.name, node1n1), next(gen1)) + self.assertEqual((node2.name, node2n1), next(gen2)) + self.assertEqual((node2.default, node2n2), next(gen2)) + self.assertEqual((node2.default, node2n3), next(gen2)) + self.assertRaises(StopIteration, next, gen1) + self.assertRaises(StopIteration, next, gen2) + def test_strip(self): """test Argument.__strip__()""" node = Argument(wrap([Text("foobar")])) diff --git a/tests/test_comment.py b/tests/test_comment.py index a7a3c4d..44225a2 100644 --- a/tests/test_comment.py +++ b/tests/test_comment.py @@ -36,6 +36,13 @@ class TestComment(TreeEqualityTestCase): node = Comment("foobar") self.assertEqual("", str(node)) + def test_iternodes(self): + """test Comment.__iternodes__()""" + node = Comment("foobar") + gen = node.__iternodes__(None) + self.assertEqual((None, node), next(gen)) + self.assertRaises(StopIteration, next, gen) + def test_strip(self): """test Comment.__strip__()""" node = Comment("foobar") diff --git a/tests/test_heading.py b/tests/test_heading.py index 79b0ebf..38f6545 100644 --- a/tests/test_heading.py +++ b/tests/test_heading.py @@ -26,7 +26,7 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Heading, Text -from ._test_tree_equality import TreeEqualityTestCase, wrap +from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap class TestHeading(TreeEqualityTestCase): """Test cases for the Heading node.""" @@ -38,6 +38,16 @@ class TestHeading(TreeEqualityTestCase): node2 = Heading(wrap([Text(" zzz ")]), 5) self.assertEqual("===== zzz =====", str(node2)) + def test_iternodes(self): + """test Heading.__iternodes__()""" + text1, text2 = Text("foo"), Text("bar") + node = Heading(wrap([text1, text2]), 3) + gen = node.__iternodes__(getnodes) + self.assertEqual((None, node), next(gen)) + self.assertEqual((node.title, text1), next(gen)) + self.assertEqual((node.title, text2), next(gen)) + self.assertRaises(StopIteration, next, gen) + def test_strip(self): """test Heading.__strip__()""" node = Heading(wrap([Text("foobar")]), 3) diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py index d3d23bf..d38e5ec 100644 --- a/tests/test_html_entity.py +++ b/tests/test_html_entity.py @@ -42,6 +42,13 @@ class TestHTMLEntity(TreeEqualityTestCase): self.assertEqual("k", str(node3)) self.assertEqual("l", str(node4)) + def test_iternodes(self): + """test HTMLEntity.__iternodes__()""" + node = HTMLEntity("nbsp", named=True, hexadecimal=False) + gen = node.__iternodes__(None) + self.assertEqual((None, node), next(gen)) + self.assertRaises(StopIteration, next, gen) + def test_strip(self): """test HTMLEntity.__strip__()""" node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) diff --git a/tests/test_template.py b/tests/test_template.py index 81b7382..28592df 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -26,7 +26,7 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import HTMLEntity, Template, Text from mwparserfromhell.nodes.extras import Parameter -from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext +from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True) pgenh = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=False) @@ -42,6 +42,30 @@ class TestTemplate(TreeEqualityTestCase): [pgenh("1", "bar"), pgens("abc", "def")]) self.assertEqual("{{foo|bar|abc=def}}", str(node2)) + def test_iternodes(self): + """test Template.__iternodes__()""" + node1n1 = Text("foobar") + node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("abc") + node2n4, node2n5 = Text("def"), Text("ghi") + node2p1 = Parameter(wraptext("1"), wrap([node2n2]), showkey=False) + node2p2 = Parameter(wrap([node2n3]), wrap([node2n4, node2n5]), + showkey=True) + node1 = Template(wrap([node1n1])) + node2 = Template(wrap([node2n1]), [node2p1, node2p2]) + + gen1 = node1.__iternodes__(getnodes) + gen2 = node2.__iternodes__(getnodes) + self.assertEqual((None, node1), next(gen1)) + self.assertEqual((None, node2), next(gen2)) + self.assertEqual((node1.name, node1n1), next(gen1)) + self.assertEqual((node2.name, node2n1), next(gen2)) + self.assertEqual((node2.params[0].value, node2n2), next(gen2)) + self.assertEqual((node2.params[1].name, node2n3), next(gen2)) + self.assertEqual((node2.params[1].value, node2n4), next(gen2)) + self.assertEqual((node2.params[1].value, node2n5), next(gen2)) + self.assertRaises(StopIteration, next, gen1) + self.assertRaises(StopIteration, next, gen2) + def test_strip(self): """test Template.__strip__()""" node1 = Template(wraptext("foobar")) diff --git a/tests/test_text.py b/tests/test_text.py index f3649dd..35ac340 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -36,6 +36,13 @@ class TestText(unittest.TestCase): node2 = Text("fóóbar") self.assertEqual("fóóbar", str(node2)) + def test_iternodes(self): + """test Text.__iternodes__()""" + node = Text("foobar") + gen = node.__iternodes__(None) + self.assertEqual((None, node), next(gen)) + self.assertRaises(StopIteration, next, gen) + def test_strip(self): """test Text.__strip__()""" node = Text("foobar") diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py index 09ca5b3..d4319c1 100644 --- a/tests/test_wikilink.py +++ b/tests/test_wikilink.py @@ -26,7 +26,7 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text, Wikilink -from ._test_tree_equality import TreeEqualityTestCase, wrap +from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap class TestWikilink(TreeEqualityTestCase): """Test cases for the Wikilink node.""" @@ -38,6 +38,23 @@ class TestWikilink(TreeEqualityTestCase): node2 = Wikilink(wrap([Text("foo")]), wrap([Text("bar")])) self.assertEqual("[[foo|bar]]", str(node2)) + def test_iternodes(self): + """test Wikilink.__iternodes__()""" + node1n1 = Text("foobar") + node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("baz") + node1 = Wikilink(wrap([node1n1])) + node2 = Wikilink(wrap([node2n1]), wrap([node2n2, node2n3])) + gen1 = node1.__iternodes__(getnodes) + gen2 = node2.__iternodes__(getnodes) + self.assertEqual((None, node1), next(gen1)) + self.assertEqual((None, node2), next(gen2)) + self.assertEqual((node1.title, node1n1), next(gen1)) + self.assertEqual((node2.title, node2n1), next(gen2)) + self.assertEqual((node2.text, node2n2), next(gen2)) + self.assertEqual((node2.text, node2n3), next(gen2)) + self.assertRaises(StopIteration, next, gen1) + self.assertRaises(StopIteration, next, gen2) + def test_strip(self): """test Wikilink.__strip__()""" node = Wikilink(wrap([Text("foobar")])) From 3fe629f54188a0f5ffde439811aab656a0418f0c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 4 May 2013 16:18:14 -0400 Subject: [PATCH 148/180] Condense usage of wrap([Text("foo")]) to just wraptext("foo"). --- tests/_test_tree_equality.py | 2 +- tests/test_argument.py | 30 ++++++++-------- tests/test_builder.py | 85 +++++++++++++++++++++----------------------- tests/test_heading.py | 20 +++++------ tests/test_parameter.py | 28 +++++++-------- tests/test_parser.py | 21 +++++------ tests/test_utils.py | 27 ++++++-------- tests/test_wikilink.py | 30 ++++++++-------- 8 files changed, 114 insertions(+), 129 deletions(-) diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index 6d9b26a..52130ed 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -30,7 +30,7 @@ from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode wrap = lambda L: Wikicode(SmartList(L)) -wraptext = lambda t: wrap([Text(t)]) +wraptext = lambda *args: wrap([Text(t) for t in args]) def getnodes(code): """Iterate over all child nodes of a given parent node. diff --git a/tests/test_argument.py b/tests/test_argument.py index a9469d4..8191804 100644 --- a/tests/test_argument.py +++ b/tests/test_argument.py @@ -26,16 +26,16 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Argument, Text -from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap +from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext class TestArgument(TreeEqualityTestCase): """Test cases for the Argument node.""" def test_unicode(self): """test Argument.__unicode__()""" - node = Argument(wrap([Text("foobar")])) + node = Argument(wraptext("foobar")) self.assertEqual("{{{foobar}}}", str(node)) - node2 = Argument(wrap([Text("foo")]), wrap([Text("bar")])) + node2 = Argument(wraptext("foo"), wraptext("bar")) self.assertEqual("{{{foo|bar}}}", str(node2)) def test_iternodes(self): @@ -57,8 +57,8 @@ class TestArgument(TreeEqualityTestCase): def test_strip(self): """test Argument.__strip__()""" - node = Argument(wrap([Text("foobar")])) - node2 = Argument(wrap([Text("foo")]), wrap([Text("bar")])) + node = Argument(wraptext("foobar")) + node2 = Argument(wraptext("foo"), wraptext("bar")) for a in (True, False): for b in (True, False): self.assertIs(None, node.__strip__(a, b)) @@ -70,8 +70,8 @@ class TestArgument(TreeEqualityTestCase): getter, marker = object(), object() get = lambda code: output.append((getter, code)) mark = lambda: output.append(marker) - node1 = Argument(wrap([Text("foobar")])) - node2 = Argument(wrap([Text("foo")]), wrap([Text("bar")])) + node1 = Argument(wraptext("foobar")) + node2 = Argument(wraptext("foo"), wraptext("bar")) node1.__showtree__(output.append, get, mark) node2.__showtree__(output.append, get, mark) valid = [ @@ -81,26 +81,26 @@ class TestArgument(TreeEqualityTestCase): def test_name(self): """test getter/setter for the name attribute""" - name = wrap([Text("foobar")]) + name = wraptext("foobar") node1 = Argument(name) - node2 = Argument(name, wrap([Text("baz")])) + node2 = Argument(name, wraptext("baz")) self.assertIs(name, node1.name) self.assertIs(name, node2.name) node1.name = "héhehé" node2.name = "héhehé" - self.assertWikicodeEqual(wrap([Text("héhehé")]), node1.name) - self.assertWikicodeEqual(wrap([Text("héhehé")]), node2.name) + self.assertWikicodeEqual(wraptext("héhehé"), node1.name) + self.assertWikicodeEqual(wraptext("héhehé"), node2.name) def test_default(self): """test getter/setter for the default attribute""" - default = wrap([Text("baz")]) - node1 = Argument(wrap([Text("foobar")])) - node2 = Argument(wrap([Text("foobar")]), default) + default = wraptext("baz") + node1 = Argument(wraptext("foobar")) + node2 = Argument(wraptext("foobar"), default) self.assertIs(None, node1.default) self.assertIs(default, node2.default) node1.default = "buzz" node2.default = None - self.assertWikicodeEqual(wrap([Text("buzz")]), node1.default) + self.assertWikicodeEqual(wraptext("buzz"), node1.default) self.assertIs(None, node2.default) if __name__ == "__main__": diff --git a/tests/test_builder.py b/tests/test_builder.py index 76917e8..903d144 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -29,7 +29,7 @@ from mwparserfromhell.nodes.extras import Attribute, Parameter from mwparserfromhell.parser import tokens from mwparserfromhell.parser.builder import Builder -from ._test_tree_equality import TreeEqualityTestCase, wrap +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext class TestBuilder(TreeEqualityTestCase): """Tests for the builder, which turns tokens into Wikicode objects.""" @@ -40,10 +40,10 @@ class TestBuilder(TreeEqualityTestCase): def test_text(self): """tests for building Text nodes""" tests = [ - ([tokens.Text(text="foobar")], wrap([Text("foobar")])), - ([tokens.Text(text="fóóbar")], wrap([Text("fóóbar")])), + ([tokens.Text(text="foobar")], wraptext("foobar")), + ([tokens.Text(text="fóóbar")], wraptext("fóóbar")), ([tokens.Text(text="spam"), tokens.Text(text="eggs")], - wrap([Text("spam"), Text("eggs")])), + wraptext("spam", "eggs")), ] for test, valid in tests: self.assertWikicodeEqual(valid, self.builder.build(test)) @@ -53,25 +53,24 @@ class TestBuilder(TreeEqualityTestCase): tests = [ ([tokens.TemplateOpen(), tokens.Text(text="foobar"), tokens.TemplateClose()], - wrap([Template(wrap([Text("foobar")]))])), + wrap([Template(wraptext("foobar"))])), ([tokens.TemplateOpen(), tokens.Text(text="spam"), tokens.Text(text="eggs"), tokens.TemplateClose()], - wrap([Template(wrap([Text("spam"), Text("eggs")]))])), + wrap([Template(wraptext("spam", "eggs"))])), ([tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), tokens.TemplateClose()], - wrap([Template(wrap([Text("foo")]), params=[ - Parameter(wrap([Text("1")]), wrap([Text("bar")]), - showkey=False)])])), + wrap([Template(wraptext("foo"), params=[ + Parameter(wraptext("1"), wraptext("bar"), showkey=False)])])), ([tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), tokens.TemplateParamEquals(), tokens.Text(text="baz"), tokens.TemplateClose()], - wrap([Template(wrap([Text("foo")]), params=[ - Parameter(wrap([Text("bar")]), wrap([Text("baz")]))])])), + wrap([Template(wraptext("foo"), params=[ + Parameter(wraptext("bar"), wraptext("baz"))])])), ([tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), @@ -82,14 +81,12 @@ class TestBuilder(TreeEqualityTestCase): tokens.TemplateParamEquals(), tokens.Text(text="buff"), tokens.TemplateParamSeparator(), tokens.Text(text="baff"), tokens.TemplateClose()], - wrap([Template(wrap([Text("foo")]), params=[ - Parameter(wrap([Text("bar")]), wrap([Text("baz")])), - Parameter(wrap([Text("1")]), wrap([Text("biz")]), - showkey=False), - Parameter(wrap([Text("2")]), wrap([Text("buzz")]), - showkey=False), - Parameter(wrap([Text("3")]), wrap([Text("buff")])), - Parameter(wrap([Text("3")]), wrap([Text("baff")]), + wrap([Template(wraptext("foo"), params=[ + Parameter(wraptext("bar"), wraptext("baz")), + Parameter(wraptext("1"), wraptext("biz"), showkey=False), + Parameter(wraptext("2"), wraptext("buzz"), showkey=False), + Parameter(wraptext("3"), wraptext("buff")), + Parameter(wraptext("3"), wraptext("baff"), showkey=False)])])), ] for test, valid in tests: @@ -100,23 +97,22 @@ class TestBuilder(TreeEqualityTestCase): tests = [ ([tokens.ArgumentOpen(), tokens.Text(text="foobar"), tokens.ArgumentClose()], - wrap([Argument(wrap([Text("foobar")]))])), + wrap([Argument(wraptext("foobar"))])), ([tokens.ArgumentOpen(), tokens.Text(text="spam"), tokens.Text(text="eggs"), tokens.ArgumentClose()], - wrap([Argument(wrap([Text("spam"), Text("eggs")]))])), + wrap([Argument(wraptext("spam", "eggs"))])), ([tokens.ArgumentOpen(), tokens.Text(text="foo"), tokens.ArgumentSeparator(), tokens.Text(text="bar"), tokens.ArgumentClose()], - wrap([Argument(wrap([Text("foo")]), wrap([Text("bar")]))])), + wrap([Argument(wraptext("foo"), wraptext("bar"))])), ([tokens.ArgumentOpen(), tokens.Text(text="foo"), tokens.Text(text="bar"), tokens.ArgumentSeparator(), tokens.Text(text="baz"), tokens.Text(text="biz"), tokens.ArgumentClose()], - wrap([Argument(wrap([Text("foo"), Text("bar")]), - wrap([Text("baz"), Text("biz")]))])), + wrap([Argument(wraptext("foo", "bar"), wraptext("baz", "biz"))])), ] for test, valid in tests: self.assertWikicodeEqual(valid, self.builder.build(test)) @@ -126,23 +122,22 @@ class TestBuilder(TreeEqualityTestCase): tests = [ ([tokens.WikilinkOpen(), tokens.Text(text="foobar"), tokens.WikilinkClose()], - wrap([Wikilink(wrap([Text("foobar")]))])), + wrap([Wikilink(wraptext("foobar"))])), ([tokens.WikilinkOpen(), tokens.Text(text="spam"), tokens.Text(text="eggs"), tokens.WikilinkClose()], - wrap([Wikilink(wrap([Text("spam"), Text("eggs")]))])), + wrap([Wikilink(wraptext("spam", "eggs"))])), ([tokens.WikilinkOpen(), tokens.Text(text="foo"), tokens.WikilinkSeparator(), tokens.Text(text="bar"), tokens.WikilinkClose()], - wrap([Wikilink(wrap([Text("foo")]), wrap([Text("bar")]))])), + wrap([Wikilink(wraptext("foo"), wraptext("bar"))])), ([tokens.WikilinkOpen(), tokens.Text(text="foo"), tokens.Text(text="bar"), tokens.WikilinkSeparator(), tokens.Text(text="baz"), tokens.Text(text="biz"), tokens.WikilinkClose()], - wrap([Wikilink(wrap([Text("foo"), Text("bar")]), - wrap([Text("baz"), Text("biz")]))])), + wrap([Wikilink(wraptext("foo", "bar"), wraptext("baz", "biz"))])), ] for test, valid in tests: self.assertWikicodeEqual(valid, self.builder.build(test)) @@ -172,11 +167,11 @@ class TestBuilder(TreeEqualityTestCase): tests = [ ([tokens.HeadingStart(level=2), tokens.Text(text="foobar"), tokens.HeadingEnd()], - wrap([Heading(wrap([Text("foobar")]), 2)])), + wrap([Heading(wraptext("foobar"), 2)])), ([tokens.HeadingStart(level=4), tokens.Text(text="spam"), tokens.Text(text="eggs"), tokens.HeadingEnd()], - wrap([Heading(wrap([Text("spam"), Text("eggs")]), 4)])), + wrap([Heading(wraptext("spam", "eggs"), 4)])), ] for test, valid in tests: self.assertWikicodeEqual(valid, self.builder.build(test)) @@ -186,11 +181,11 @@ class TestBuilder(TreeEqualityTestCase): tests = [ ([tokens.CommentStart(), tokens.Text(text="foobar"), tokens.CommentEnd()], - wrap([Comment(wrap([Text("foobar")]))])), + wrap([Comment(wraptext("foobar"))])), ([tokens.CommentStart(), tokens.Text(text="spam"), tokens.Text(text="eggs"), tokens.CommentEnd()], - wrap([Comment(wrap([Text("spam"), Text("eggs")]))])), + wrap([Comment(wraptext("spam", "eggs"))])), ] for test, valid in tests: self.assertWikicodeEqual(valid, self.builder.build(test)) @@ -214,10 +209,10 @@ class TestBuilder(TreeEqualityTestCase): tokens.TemplateOpen(), tokens.Text(text="bin"), tokens.TemplateClose(), tokens.TemplateClose()] valid = wrap( - [Template(wrap([Template(wrap([Template(wrap([Template(wrap([Text( - "foo")])), Text("bar")]), params=[Parameter(wrap([Text("baz")]), - wrap([Text("biz")]))]), Text("buzz")])), Text("usr")]), params=[ - Parameter(wrap([Text("1")]), wrap([Template(wrap([Text("bin")]))]), + [Template(wrap([Template(wrap([Template(wrap([Template(wraptext( + "foo")), Text("bar")]), params=[Parameter(wraptext("baz"), + wraptext("biz"))]), Text("buzz")])), Text("usr")]), params=[ + Parameter(wraptext("1"), wrap([Template(wraptext("bin"))]), showkey=False)])]) self.assertWikicodeEqual(valid, self.builder.build(test)) @@ -243,14 +238,14 @@ class TestBuilder(TreeEqualityTestCase): tokens.Text(text="nbsp"), tokens.HTMLEntityEnd(), tokens.TemplateClose()] valid = wrap( - [Template(wrap([Text("a")]), params=[Parameter(wrap([Text("1")]), - wrap([Text("b")]), showkey=False), Parameter(wrap([Text("2")]), - wrap([Template(wrap([Text("c")]), params=[Parameter(wrap([Text("1") - ]), wrap([Wikilink(wrap([Text("d")])), Argument(wrap([Text("e")]))] - ), showkey=False)])]), showkey=False)]), Wikilink(wrap([Text("f")] - ), wrap([Argument(wrap([Text("g")])), Comment(wrap([Text("h")]))]) - ), Template(wrap([Text("i")]), params=[Parameter(wrap([Text("j")]), - wrap([HTMLEntity("nbsp", named=True)]))])]) + [Template(wraptext("a"), params=[Parameter(wraptext("1"), wraptext( + "b"), showkey=False), Parameter(wraptext("2"), wrap([Template( + wraptext("c"), params=[Parameter(wraptext("1"), wrap([Wikilink( + wraptext("d")), Argument(wraptext("e"))]), showkey=False)])]), + showkey=False)]), Wikilink(wraptext("f"), wrap([Argument(wraptext( + "g")), Comment(wraptext("h"))])), Template(wraptext("i"), params=[ + Parameter(wraptext("j"), wrap([HTMLEntity("nbsp", + named=True)]))])]) self.assertWikicodeEqual(valid, self.builder.build(test)) if __name__ == "__main__": diff --git a/tests/test_heading.py b/tests/test_heading.py index 38f6545..7a65872 100644 --- a/tests/test_heading.py +++ b/tests/test_heading.py @@ -26,16 +26,16 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Heading, Text -from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap +from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext class TestHeading(TreeEqualityTestCase): """Test cases for the Heading node.""" def test_unicode(self): """test Heading.__unicode__()""" - node = Heading(wrap([Text("foobar")]), 2) + node = Heading(wraptext("foobar"), 2) self.assertEqual("==foobar==", str(node)) - node2 = Heading(wrap([Text(" zzz ")]), 5) + node2 = Heading(wraptext(" zzz "), 5) self.assertEqual("===== zzz =====", str(node2)) def test_iternodes(self): @@ -50,7 +50,7 @@ class TestHeading(TreeEqualityTestCase): def test_strip(self): """test Heading.__strip__()""" - node = Heading(wrap([Text("foobar")]), 3) + node = Heading(wraptext("foobar"), 3) for a in (True, False): for b in (True, False): self.assertEqual("foobar", node.__strip__(a, b)) @@ -60,8 +60,8 @@ class TestHeading(TreeEqualityTestCase): output = [] getter = object() get = lambda code: output.append((getter, code)) - node1 = Heading(wrap([Text("foobar")]), 3) - node2 = Heading(wrap([Text(" baz ")]), 4) + node1 = Heading(wraptext("foobar"), 3) + node2 = Heading(wraptext(" baz "), 4) node1.__showtree__(output.append, get, None) node2.__showtree__(output.append, get, None) valid = ["===", (getter, node1.title), "===", @@ -70,20 +70,18 @@ class TestHeading(TreeEqualityTestCase): def test_title(self): """test getter/setter for the title attribute""" - title = wrap([Text("foobar")]) + title = wraptext("foobar") node = Heading(title, 3) self.assertIs(title, node.title) node.title = "héhehé" - self.assertWikicodeEqual(wrap([Text("héhehé")]), node.title) + self.assertWikicodeEqual(wraptext("héhehé"), node.title) def test_level(self): """test getter/setter for the level attribute""" - node = Heading(wrap([Text("foobar")]), 3) + node = Heading(wraptext("foobar"), 3) self.assertEqual(3, node.level) node.level = 5 self.assertEqual(5, node.level) - node.level = True - self.assertEqual(1, node.level) self.assertRaises(ValueError, setattr, node, "level", 0) self.assertRaises(ValueError, setattr, node, "level", 7) self.assertRaises(ValueError, setattr, node, "level", "abc") diff --git a/tests/test_parameter.py b/tests/test_parameter.py index 8e85eda..4786e12 100644 --- a/tests/test_parameter.py +++ b/tests/test_parameter.py @@ -27,43 +27,43 @@ from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text from mwparserfromhell.nodes.extras import Parameter -from ._test_tree_equality import TreeEqualityTestCase, wrap +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext class TestParameter(TreeEqualityTestCase): """Test cases for the Parameter node extra.""" def test_unicode(self): """test Parameter.__unicode__()""" - node = Parameter(wrap([Text("1")]), wrap([Text("foo")]), showkey=False) + node = Parameter(wraptext("1"), wraptext("foo"), showkey=False) self.assertEqual("foo", str(node)) - node2 = Parameter(wrap([Text("foo")]), wrap([Text("bar")])) + node2 = Parameter(wraptext("foo"), wraptext("bar")) self.assertEqual("foo=bar", str(node2)) def test_name(self): """test getter/setter for the name attribute""" - name1 = wrap([Text("1")]) - name2 = wrap([Text("foobar")]) - node1 = Parameter(name1, wrap([Text("foobar")]), showkey=False) - node2 = Parameter(name2, wrap([Text("baz")])) + name1 = wraptext("1") + name2 = wraptext("foobar") + node1 = Parameter(name1, wraptext("foobar"), showkey=False) + node2 = Parameter(name2, wraptext("baz")) self.assertIs(name1, node1.name) self.assertIs(name2, node2.name) node1.name = "héhehé" node2.name = "héhehé" - self.assertWikicodeEqual(wrap([Text("héhehé")]), node1.name) - self.assertWikicodeEqual(wrap([Text("héhehé")]), node2.name) + self.assertWikicodeEqual(wraptext("héhehé"), node1.name) + self.assertWikicodeEqual(wraptext("héhehé"), node2.name) def test_value(self): """test getter/setter for the value attribute""" - value = wrap([Text("bar")]) - node = Parameter(wrap([Text("foo")]), value) + value = wraptext("bar") + node = Parameter(wraptext("foo"), value) self.assertIs(value, node.value) node.value = "héhehé" - self.assertWikicodeEqual(wrap([Text("héhehé")]), node.value) + self.assertWikicodeEqual(wraptext("héhehé"), node.value) def test_showkey(self): """test getter/setter for the showkey attribute""" - node1 = Parameter(wrap([Text("1")]), wrap([Text("foo")]), showkey=False) - node2 = Parameter(wrap([Text("foo")]), wrap([Text("bar")])) + node1 = Parameter(wraptext("1"), wraptext("foo"), showkey=False) + node2 = Parameter(wraptext("foo"), wraptext("bar")) self.assertFalse(node1.showkey) self.assertTrue(node2.showkey) node1.showkey = True diff --git a/tests/test_parser.py b/tests/test_parser.py index 9d2c969..ec5f065 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -26,10 +26,8 @@ import unittest from mwparserfromhell import parser from mwparserfromhell.nodes import Template, Text, Wikilink from mwparserfromhell.nodes.extras import Parameter -from mwparserfromhell.smart_list import SmartList -from mwparserfromhell.wikicode import Wikicode -from ._test_tree_equality import TreeEqualityTestCase +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext from .compat import range class TestParser(TreeEqualityTestCase): @@ -45,18 +43,17 @@ class TestParser(TreeEqualityTestCase): def test_parsing(self): """integration test for parsing overall""" text = "this is text; {{this|is=a|template={{with|[[links]]|in}}it}}" - wrap = lambda L: Wikicode(SmartList(L)) expected = wrap([ Text("this is text; "), - Template(wrap([Text("this")]), [ - Parameter(wrap([Text("is")]), wrap([Text("a")])), - Parameter(wrap([Text("template")]), wrap([ - Template(wrap([Text("with")]), [ - Parameter(wrap([Text("1")]), - wrap([Wikilink(wrap([Text("links")]))]), + Template(wraptext("this"), [ + Parameter(wraptext("is"), wraptext("a")), + Parameter(wraptext("template"), wrap([ + Template(wraptext("with"), [ + Parameter(wraptext("1"), + wrap([Wikilink(wraptext("links"))]), showkey=False), - Parameter(wrap([Text("2")]), - wrap([Text("in")]), showkey=False) + Parameter(wraptext("2"), + wraptext("in"), showkey=False) ]), Text("it") ])) diff --git a/tests/test_utils.py b/tests/test_utils.py index c088530..80a0e5e 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -24,33 +24,28 @@ from __future__ import unicode_literals import unittest from mwparserfromhell.nodes import Template, Text -from mwparserfromhell.smart_list import SmartList from mwparserfromhell.utils import parse_anything -from mwparserfromhell.wikicode import Wikicode -from ._test_tree_equality import TreeEqualityTestCase +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext class TestUtils(TreeEqualityTestCase): """Tests for the utils module, which provides parse_anything().""" def test_parse_anything_valid(self): """tests for valid input to utils.parse_anything()""" - wrap = lambda L: Wikicode(SmartList(L)) - textify = lambda L: wrap([Text(item) for item in L]) tests = [ - (wrap([Text("foobar")]), textify(["foobar"])), - (Template(wrap([Text("spam")])), - wrap([Template(textify(["spam"]))])), - ("fóóbar", textify(["fóóbar"])), - (b"foob\xc3\xa1r", textify(["foobár"])), - (123, textify(["123"])), - (True, textify(["True"])), + (wraptext("foobar"), wraptext("foobar")), + (Template(wraptext("spam")), wrap([Template(wraptext("spam"))])), + ("fóóbar", wraptext("fóóbar")), + (b"foob\xc3\xa1r", wraptext("foobár")), + (123, wraptext("123")), + (True, wraptext("True")), (None, wrap([])), ([Text("foo"), Text("bar"), Text("baz")], - textify(["foo", "bar", "baz"])), - ([wrap([Text("foo")]), Text("bar"), "baz", 123, 456], - textify(["foo", "bar", "baz", "123", "456"])), - ([[[([[((("foo",),),)], "bar"],)]]], textify(["foo", "bar"])) + wraptext("foo", "bar", "baz")), + ([wraptext("foo"), Text("bar"), "baz", 123, 456], + wraptext("foo", "bar", "baz", "123", "456")), + ([[[([[((("foo",),),)], "bar"],)]]], wraptext("foo", "bar")) ] for test, valid in tests: self.assertWikicodeEqual(valid, parse_anything(test)) diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py index d4319c1..7851032 100644 --- a/tests/test_wikilink.py +++ b/tests/test_wikilink.py @@ -26,16 +26,16 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text, Wikilink -from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap +from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext class TestWikilink(TreeEqualityTestCase): """Test cases for the Wikilink node.""" def test_unicode(self): """test Wikilink.__unicode__()""" - node = Wikilink(wrap([Text("foobar")])) + node = Wikilink(wraptext("foobar")) self.assertEqual("[[foobar]]", str(node)) - node2 = Wikilink(wrap([Text("foo")]), wrap([Text("bar")])) + node2 = Wikilink(wraptext("foo"), wraptext("bar")) self.assertEqual("[[foo|bar]]", str(node2)) def test_iternodes(self): @@ -57,8 +57,8 @@ class TestWikilink(TreeEqualityTestCase): def test_strip(self): """test Wikilink.__strip__()""" - node = Wikilink(wrap([Text("foobar")])) - node2 = Wikilink(wrap([Text("foo")]), wrap([Text("bar")])) + node = Wikilink(wraptext("foobar")) + node2 = Wikilink(wraptext("foo"), wraptext("bar")) for a in (True, False): for b in (True, False): self.assertEqual("foobar", node.__strip__(a, b)) @@ -70,8 +70,8 @@ class TestWikilink(TreeEqualityTestCase): getter, marker = object(), object() get = lambda code: output.append((getter, code)) mark = lambda: output.append(marker) - node1 = Wikilink(wrap([Text("foobar")])) - node2 = Wikilink(wrap([Text("foo")]), wrap([Text("bar")])) + node1 = Wikilink(wraptext("foobar")) + node2 = Wikilink(wraptext("foo"), wraptext("bar")) node1.__showtree__(output.append, get, mark) node2.__showtree__(output.append, get, mark) valid = [ @@ -81,26 +81,26 @@ class TestWikilink(TreeEqualityTestCase): def test_title(self): """test getter/setter for the title attribute""" - title = wrap([Text("foobar")]) + title = wraptext("foobar") node1 = Wikilink(title) - node2 = Wikilink(title, wrap([Text("baz")])) + node2 = Wikilink(title, wraptext("baz")) self.assertIs(title, node1.title) self.assertIs(title, node2.title) node1.title = "héhehé" node2.title = "héhehé" - self.assertWikicodeEqual(wrap([Text("héhehé")]), node1.title) - self.assertWikicodeEqual(wrap([Text("héhehé")]), node2.title) + self.assertWikicodeEqual(wraptext("héhehé"), node1.title) + self.assertWikicodeEqual(wraptext("héhehé"), node2.title) def test_text(self): """test getter/setter for the text attribute""" - text = wrap([Text("baz")]) - node1 = Wikilink(wrap([Text("foobar")])) - node2 = Wikilink(wrap([Text("foobar")]), text) + text = wraptext("baz") + node1 = Wikilink(wraptext("foobar")) + node2 = Wikilink(wraptext("foobar"), text) self.assertIs(None, node1.text) self.assertIs(text, node2.text) node1.text = "buzz" node2.text = None - self.assertWikicodeEqual(wrap([Text("buzz")]), node1.text) + self.assertWikicodeEqual(wraptext("buzz"), node1.text) self.assertIs(None, node2.text) if __name__ == "__main__": From 852c5ff9af1c91aef34b85b10afa59623a117271 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 4 May 2013 21:06:17 -0400 Subject: [PATCH 149/180] Start TestWikicode; make Wikicode.nodes's setter handle more inputs. --- mwparserfromhell/wikicode.py | 2 + tests/test_wikicode.py | 117 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100644 tests/test_wikicode.py diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index f2d9c89..c295fd6 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -162,6 +162,8 @@ class Wikicode(StringMixIn): @nodes.setter def nodes(self, value): + if not isinstance(value, list): + value = parse_anything(value).nodes self._nodes = value def get(self, index): diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py new file mode 100644 index 0000000..421a714 --- /dev/null +++ b/tests/test_wikicode.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, + Tag, Template, Text, Wikilink) +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.wikicode import Wikicode +from mwparserfromhell import parse +from mwparserfromhell.compat import str + +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext + +class TestWikicode(TreeEqualityTestCase): + """Tests for the Wikicode class, which manages a list of nodes.""" + + def test_unicode(self): + """test Wikicode.__unicode__()""" + code1 = parse("foobar") + code2 = parse("Have a {{template}} and a [[page|link]]") + self.assertEqual("foobar", str(code1)) + self.assertEqual("Have a {{template}} and a [[page|link]]", str(code2)) + + def test_nodes(self): + """test getter/setter for the nodes attribute""" + code = parse("Have a {{template}}") + self.assertEqual(["Have a ", "{{template}}"], code.nodes) + L1 = SmartList([Text("foobar"), Template(wraptext("abc"))]) + L2 = [Text("barfoo"), Template(wraptext("cba"))] + L3 = "abc{{def}}" + code.nodes = L1 + self.assertIs(L1, code.nodes) + code.nodes = L2 + self.assertIs(L2, code.nodes) + code.nodes = L3 + self.assertEqual(["abc", "{{def}}"], code.nodes) + self.assertRaises(ValueError, setattr, code, "nodes", object) + + def test_get(self): + """test Wikicode.get()""" + code = parse("Have a {{template}} and a [[page|link]]") + self.assertIs(code.nodes[0], code.get(0)) + self.assertIs(code.nodes[2], code.get(2)) + self.assertRaises(IndexError, code.get, 4) + + def test_set(self): + """test Wikicode.set()""" + pass + + def test_index(self): + """test Wikicode.index()""" + pass + + def test_insert(self): + """test Wikicode.insert()""" + pass + + def test_insert_before(self): + """test Wikicode.insert_before()""" + pass + + def test_insert_after(self): + """test Wikicode.insert_after()""" + pass + + def test_replace(self): + """test Wikicode.replace()""" + pass + + def test_append(self): + """test Wikicode.append()""" + pass + + def test_remove(self): + """test Wikicode.remove()""" + pass + + def test_filter_family(self): + """test the Wikicode.i?filter() family of functions""" + pass + + def test_get_sections(self): + """test Wikicode.get_sections()""" + pass + + def test_strip_code(self): + """test Wikicode.strip_code()""" + pass + + def test_get_tree(self): + """test Wikicode.get_tree()""" + pass + + +if __name__ == "__main__": + unittest.main(verbosity=2) From ee99e6eceb5e77dae0b786422a48893e4255a76c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 6 May 2013 22:29:02 -0400 Subject: [PATCH 150/180] Fix a bug in Wikicode.set(); implement test_set() and test_index() --- mwparserfromhell/wikicode.py | 3 ++- tests/test_wikicode.py | 27 +++++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index c295fd6..e9bd133 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -184,9 +184,10 @@ class Wikicode(StringMixIn): raise ValueError("Cannot coerce multiple nodes into one index") if index >= len(self.nodes) or -1 * index > len(self.nodes): raise IndexError("List assignment index out of range") - self.nodes.pop(index) if nodes: self.nodes[index] = nodes[0] + else: + self.nodes.pop(index) def index(self, obj, recursive=False): """Return the index of *obj* in the list of nodes. diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 421a714..485ab99 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -66,11 +66,34 @@ class TestWikicode(TreeEqualityTestCase): def test_set(self): """test Wikicode.set()""" - pass + code = parse("Have a {{template}} and a [[page|link]]") + code.set(1, "{{{argument}}}") + self.assertEqual("Have a {{{argument}}} and a [[page|link]]", code) + self.assertIsInstance(code.get(1), Argument) + code.set(2, None) + self.assertEqual("Have a {{{argument}}}[[page|link]]", code) + code.set(-3, "This is an ") + self.assertEqual("This is an {{{argument}}}[[page|link]]", code) + self.assertRaises(ValueError, code.set, 1, "foo {{bar}}") + self.assertRaises(IndexError, code.set, 3, "{{baz}}") + self.assertRaises(IndexError, code.set, -4, "{{baz}}") def test_index(self): """test Wikicode.index()""" - pass + code = parse("Have a {{template}} and a [[page|link]]") + self.assertEqual(0, code.index("Have a ")) + self.assertEqual(3, code.index("[[page|link]]")) + self.assertEqual(1, code.index(code.get(1))) + self.assertRaises(ValueError, code.index, "foo") + + code = parse("{{foo}}{{bar|{{baz}}}}") + self.assertEqual(1, code.index("{{bar|{{baz}}}}")) + self.assertEqual(1, code.index("{{baz}}", recursive=True)) + self.assertEqual(1, code.index(code.get(1).get(1).value, + recursive=True)) + self.assertRaises(ValueError, code.index, "{{baz}}", recursive=False) + self.assertRaises(ValueError, code.index, + code.get(1).get(1).value, recursive=False) def test_insert(self): """test Wikicode.insert()""" From 3095a4203f7e7ca680da861d8b243a2284acfc93 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 8 May 2013 11:03:04 -0400 Subject: [PATCH 151/180] Finish tests for Wikicode's list-like methods; fix a bug. --- mwparserfromhell/wikicode.py | 2 +- tests/test_wikicode.py | 101 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 96 insertions(+), 7 deletions(-) diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index e9bd133..b704590 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -68,7 +68,7 @@ class Wikicode(StringMixIn): Raises ``ValueError`` if *obj* is not within *node*. """ for context, child in node.__iternodes__(self._get_all_nodes): - if child is obj: + if self._is_equivalent(obj, child): return context raise ValueError(obj) diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 485ab99..179d588 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -97,27 +97,116 @@ class TestWikicode(TreeEqualityTestCase): def test_insert(self): """test Wikicode.insert()""" - pass + code = parse("Have a {{template}} and a [[page|link]]") + code.insert(1, "{{{argument}}}") + self.assertEqual( + "Have a {{{argument}}}{{template}} and a [[page|link]]", code) + self.assertIsInstance(code.get(1), Argument) + code.insert(2, None) + self.assertEqual( + "Have a {{{argument}}}{{template}} and a [[page|link]]", code) + code.insert(-3, Text("foo")) + self.assertEqual( + "Have a {{{argument}}}foo{{template}} and a [[page|link]]", code) + + code2 = parse("{{foo}}{{bar}}{{baz}}") + code2.insert(1, "abc{{def}}ghi[[jk]]") + self.assertEqual("{{foo}}abc{{def}}ghi[[jk]]{{bar}}{{baz}}", code2) + self.assertEqual(["{{foo}}", "abc", "{{def}}", "ghi", "[[jk]]", + "{{bar}}", "{{baz}}"], code2.nodes) + + code3 = parse("{{foo}}bar") + code3.insert(1000, "[[baz]]") + code3.insert(-1000, "derp") + self.assertEqual("derp{{foo}}bar[[baz]]", code3) def test_insert_before(self): """test Wikicode.insert_before()""" - pass + code = parse("{{a}}{{b}}{{c}}{{d}}") + code.insert_before("{{b}}", "x", recursive=True) + code.insert_before("{{d}}", "[[y]]", recursive=False) + self.assertEqual("{{a}}x{{b}}{{c}}[[y]]{{d}}", code) + code.insert_before(code.get(2), "z") + self.assertEqual("{{a}}xz{{b}}{{c}}[[y]]{{d}}", code) + self.assertRaises(ValueError, code.insert_before, "{{r}}", "n", + recursive=True) + self.assertRaises(ValueError, code.insert_before, "{{r}}", "n", + recursive=False) + + code2 = parse("{{a|{{b}}|{{c|d={{f}}}}}}") + code2.insert_before(code2.get(0).params[0].value.get(0), "x", + recursive=True) + code2.insert_before("{{f}}", "y", recursive=True) + self.assertEqual("{{a|x{{b}}|{{c|d=y{{f}}}}}}", code2) + self.assertRaises(ValueError, code2.insert_before, "{{f}}", "y", + recursive=False) def test_insert_after(self): """test Wikicode.insert_after()""" - pass + code = parse("{{a}}{{b}}{{c}}{{d}}") + code.insert_after("{{b}}", "x", recursive=True) + code.insert_after("{{d}}", "[[y]]", recursive=False) + self.assertEqual("{{a}}{{b}}x{{c}}{{d}}[[y]]", code) + code.insert_after(code.get(2), "z") + self.assertEqual("{{a}}{{b}}xz{{c}}{{d}}[[y]]", code) + self.assertRaises(ValueError, code.insert_after, "{{r}}", "n", + recursive=True) + self.assertRaises(ValueError, code.insert_after, "{{r}}", "n", + recursive=False) + + code2 = parse("{{a|{{b}}|{{c|d={{f}}}}}}") + code2.insert_after(code2.get(0).params[0].value.get(0), "x", + recursive=True) + code2.insert_after("{{f}}", "y", recursive=True) + self.assertEqual("{{a|{{b}}x|{{c|d={{f}}y}}}}", code2) + self.assertRaises(ValueError, code2.insert_after, "{{f}}", "y", + recursive=False) def test_replace(self): """test Wikicode.replace()""" - pass + code = parse("{{a}}{{b}}{{c}}{{d}}") + code.replace("{{b}}", "x", recursive=True) + code.replace("{{d}}", "[[y]]", recursive=False) + self.assertEqual("{{a}}x{{c}}[[y]]", code) + code.replace(code.get(1), "z") + self.assertEqual("{{a}}z{{c}}[[y]]", code) + self.assertRaises(ValueError, code.replace, "{{r}}", "n", + recursive=True) + self.assertRaises(ValueError, code.replace, "{{r}}", "n", + recursive=False) + + code2 = parse("{{a|{{b}}|{{c|d={{f}}}}}}") + code2.replace(code2.get(0).params[0].value.get(0), "x", recursive=True) + code2.replace("{{f}}", "y", recursive=True) + self.assertEqual("{{a|x|{{c|d=y}}}}", code2) + self.assertRaises(ValueError, code2.replace, "y", "z", recursive=False) def test_append(self): """test Wikicode.append()""" - pass + code = parse("Have a {{template}}") + code.append("{{{argument}}}") + self.assertEqual("Have a {{template}}{{{argument}}}", code) + self.assertIsInstance(code.get(2), Argument) + code.append(None) + self.assertEqual("Have a {{template}}{{{argument}}}", code) + code.append(Text(" foo")) + self.assertEqual("Have a {{template}}{{{argument}}} foo", code) + self.assertRaises(ValueError, code.append, slice(0, 1)) def test_remove(self): """test Wikicode.remove()""" - pass + code = parse("{{a}}{{b}}{{c}}{{d}}") + code.remove("{{b}}", recursive=True) + code.remove(code.get(1), recursive=True) + self.assertEqual("{{a}}{{d}}", code) + self.assertRaises(ValueError, code.remove, "{{r}}", recursive=True) + self.assertRaises(ValueError, code.remove, "{{r}}", recursive=False) + + code2 = parse("{{a|{{b}}|{{c|d={{f}}{{h}}}}}}") + code2.remove(code2.get(0).params[0].value.get(0), recursive=True) + code2.remove("{{f}}", recursive=True) + self.assertEqual("{{a||{{c|d={{h}}}}}}", code2) + self.assertRaises(ValueError, code2.remove, "{{h}}", recursive=False) def test_filter_family(self): """test the Wikicode.i?filter() family of functions""" From 17ac79e79660e3775e3e06dde254d122515a08da Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 11 May 2013 15:58:45 -0400 Subject: [PATCH 152/180] Build filter methods dynamically. --- mwparserfromhell/wikicode.py | 97 ++++++++++++++------------------------------ 1 file changed, 31 insertions(+), 66 deletions(-) diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index b704590..4750094 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -23,7 +23,7 @@ from __future__ import unicode_literals import re -from .compat import maxsize, str +from .compat import maxsize, py3k, str from .nodes import Heading, Node, Tag, Template, Text, Wikilink from .string_mixin import StringMixIn from .utils import parse_anything @@ -291,46 +291,36 @@ class Wikicode(StringMixIn): *flags*. If *forcetype* is given, only nodes that are instances of this type are yielded. """ - if recursive: - nodes = self._get_all_nodes(self) - else: - nodes = self.nodes - for node in nodes: + for node in (self._get_all_nodes(self) if recursive else self.nodes): if not forcetype or isinstance(node, forcetype): if not matches or re.search(matches, str(node), flags): yield node - def ifilter_links(self, recursive=False, matches=None, flags=FLAGS): - """Iterate over wikilink nodes. - - This is equivalent to :py:meth:`ifilter` with *forcetype* set to - :py:class:`~.Wikilink`. - """ - return self.ifilter(recursive, matches, flags, forcetype=Wikilink) - - def ifilter_templates(self, recursive=False, matches=None, flags=FLAGS): - """Iterate over template nodes. - - This is equivalent to :py:meth:`ifilter` with *forcetype* set to - :py:class:`~.Template`. - """ - return self.filter(recursive, matches, flags, forcetype=Template) - - def ifilter_text(self, recursive=False, matches=None, flags=FLAGS): - """Iterate over text nodes. - - This is equivalent to :py:meth:`ifilter` with *forcetype* set to - :py:class:`~.nodes.Text`. + @classmethod + def _build_filter_methods(cls, meths): + """Given a dict of Node types, build corresponding i?filter shortcuts. + + The dict should be given as keys storing the method's base name paired + with values storing the corresponding :py:class:`~.Node` type. For + example, the dict may contain the pair ``("templates", Template)``, + which will produce the methods :py:meth:`ifilter_templates` and + :py:meth:`filter_templates`, which are shortcuts for + :py:meth:`ifilter(forcetype=Template) ` and + :py:meth:`filter(forcetype=Template) `, respectively. These + shortcuts are added to the class itself, with an appropriate docstring. """ - return self.filter(recursive, matches, flags, forcetype=Text) - - def ifilter_tags(self, recursive=False, matches=None, flags=FLAGS): - """Iterate over tag nodes. + doc = """Iterate over {0}. - This is equivalent to :py:meth:`ifilter` with *forcetype* set to - :py:class:`~.Tag`. + This is equivalent to :py:meth:`{1}` with *forcetype* set to + :py:class:`~.{2}`. """ - return self.ifilter(recursive, matches, flags, forcetype=Tag) + for name, forcetype in (meths.items() if py3k else meths.iteritems()): + ifil = lambda self, **kw: self.ifilter(forcetype=forcetype, **kw) + fil = lambda self, **kw: self.filter(forcetype=forcetype, **kw) + ifil.__doc__ = doc.format(name, "ifilter", forcetype) + fil.__doc__ = doc.format(name, "filter", forcetype) + setattr(cls, "ifilter_" + name, ifil) + setattr(cls, "filter_" + name, fil) def filter(self, recursive=False, matches=None, flags=FLAGS, forcetype=None): @@ -340,38 +330,6 @@ class Wikicode(StringMixIn): """ return list(self.ifilter(recursive, matches, flags, forcetype)) - def filter_links(self, recursive=False, matches=None, flags=FLAGS): - """Return a list of wikilink nodes. - - This is equivalent to calling :py:func:`list` on - :py:meth:`ifilter_links`. - """ - return list(self.ifilter_links(recursive, matches, flags)) - - def filter_templates(self, recursive=False, matches=None, flags=FLAGS): - """Return a list of template nodes. - - This is equivalent to calling :py:func:`list` on - :py:meth:`ifilter_templates`. - """ - return list(self.ifilter_templates(recursive, matches, flags)) - - def filter_text(self, recursive=False, matches=None, flags=FLAGS): - """Return a list of text nodes. - - This is equivalent to calling :py:func:`list` on - :py:meth:`ifilter_text`. - """ - return list(self.ifilter_text(recursive, matches, flags)) - - def filter_tags(self, recursive=False, matches=None, flags=FLAGS): - """Return a list of tag nodes. - - This is equivalent to calling :py:func:`list` on - :py:meth:`ifilter_tags`. - """ - return list(self.ifilter_tags(recursive, matches, flags)) - def get_sections(self, flat=True, matches=None, levels=None, flags=FLAGS, include_headings=True): """Return a list of sections within the page. @@ -470,3 +428,10 @@ class Wikicode(StringMixIn): """ marker = object() # Random object we can find with certainty in a list return "\n".join(self._get_tree(self, [], marker, 0)) + +Wikicode._build_filter_methods({ + "links": Wikilink, + "templates": Template, + "text": Text, + "tag": Tag + }) From f700914caf895ff7a6ac628797e7a337ee53e4be Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 11 May 2013 19:21:24 -0400 Subject: [PATCH 153/180] Cleanup Wikicode's filter functions; implement test_filter_family(). --- mwparserfromhell/wikicode.py | 69 +++++++++++++++++++++++--------------------- tests/test_wikicode.py | 65 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 99 insertions(+), 35 deletions(-) diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 4750094..365eab7 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -24,7 +24,8 @@ from __future__ import unicode_literals import re from .compat import maxsize, py3k, str -from .nodes import Heading, Node, Tag, Template, Text, Wikilink +from .nodes import (Argument, Comment, Heading, HTMLEntity, Node, Tag, + Template, Text, Wikilink) from .string_mixin import StringMixIn from .utils import parse_anything @@ -151,6 +152,36 @@ class Wikicode(StringMixIn): node.__showtree__(write, get, mark) return lines + @classmethod + def _build_filter_methods(cls, **meths): + """Given Node types, build the corresponding i?filter shortcuts. + + The should be given as keys storing the method's base name paired + with values storing the corresponding :py:class:`~.Node` type. For + example, the dict may contain the pair ``("templates", Template)``, + which will produce the methods :py:meth:`ifilter_templates` and + :py:meth:`filter_templates`, which are shortcuts for + :py:meth:`ifilter(forcetype=Template) ` and + :py:meth:`filter(forcetype=Template) `, respectively. These + shortcuts are added to the class itself, with an appropriate docstring. + """ + doc = """Iterate over {0}. + + This is equivalent to :py:meth:`{1}` with *forcetype* set to + :py:class:`~.{2}`. + """ + make_ifilter = lambda ftype: (lambda self, **kw: + self.ifilter(forcetype=ftype, **kw)) + make_filter = lambda ftype: (lambda self, **kw: + self.filter(forcetype=ftype, **kw)) + for name, ftype in (meths.items() if py3k else meths.iteritems()): + ifilter = make_ifilter(ftype) + filter = make_filter(ftype) + ifilter.__doc__ = doc.format(name, "ifilter", ftype.__name__) + filter.__doc__ = doc.format(name, "filter", ftype.__name__) + setattr(cls, "ifilter_" + name, ifilter) + setattr(cls, "filter_" + name, filter) + @property def nodes(self): """A list of :py:class:`~.Node` objects. @@ -296,32 +327,6 @@ class Wikicode(StringMixIn): if not matches or re.search(matches, str(node), flags): yield node - @classmethod - def _build_filter_methods(cls, meths): - """Given a dict of Node types, build corresponding i?filter shortcuts. - - The dict should be given as keys storing the method's base name paired - with values storing the corresponding :py:class:`~.Node` type. For - example, the dict may contain the pair ``("templates", Template)``, - which will produce the methods :py:meth:`ifilter_templates` and - :py:meth:`filter_templates`, which are shortcuts for - :py:meth:`ifilter(forcetype=Template) ` and - :py:meth:`filter(forcetype=Template) `, respectively. These - shortcuts are added to the class itself, with an appropriate docstring. - """ - doc = """Iterate over {0}. - - This is equivalent to :py:meth:`{1}` with *forcetype* set to - :py:class:`~.{2}`. - """ - for name, forcetype in (meths.items() if py3k else meths.iteritems()): - ifil = lambda self, **kw: self.ifilter(forcetype=forcetype, **kw) - fil = lambda self, **kw: self.filter(forcetype=forcetype, **kw) - ifil.__doc__ = doc.format(name, "ifilter", forcetype) - fil.__doc__ = doc.format(name, "filter", forcetype) - setattr(cls, "ifilter_" + name, ifil) - setattr(cls, "filter_" + name, fil) - def filter(self, recursive=False, matches=None, flags=FLAGS, forcetype=None): """Return a list of nodes within our list matching certain conditions. @@ -429,9 +434,7 @@ class Wikicode(StringMixIn): marker = object() # Random object we can find with certainty in a list return "\n".join(self._get_tree(self, [], marker, 0)) -Wikicode._build_filter_methods({ - "links": Wikilink, - "templates": Template, - "text": Text, - "tag": Tag - }) +Wikicode._build_filter_methods( + arguments=Argument, comments=Comment, headings=Heading, + html_entities=HTMLEntity, tags=Tag, templates=Template, text=Text, + wikilinks=Wikilink) diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 179d588..69600c4 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -21,6 +21,8 @@ # SOFTWARE. from __future__ import unicode_literals +import re +from types import GeneratorType import unittest from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, @@ -210,7 +212,67 @@ class TestWikicode(TreeEqualityTestCase): def test_filter_family(self): """test the Wikicode.i?filter() family of functions""" - pass + def genlist(gen): + self.assertIsInstance(gen, GeneratorType) + return list(gen) + ifilter = lambda code: (lambda **kw: genlist(code.ifilter(**kw))) + + code = parse("a{{b}}c[[d]]{{{e}}}{{f}}[[g]]") + for func in (code.filter, ifilter(code)): + self.assertEqual(["a", "{{b}}", "c", "[[d]]", "{{{e}}}", "{{f}}", + "[[g]]"], func()) + self.assertEqual(["{{{e}}}"], func(forcetype=Argument)) + self.assertIs(code.get(4), func(forcetype=Argument)[0]) + self.assertEqual(["a", "c"], func(forcetype=Text)) + self.assertEqual([], func(forcetype=Heading)) + self.assertRaises(TypeError, func, forcetype=True) + + funcs = [ + lambda name, **kw: getattr(code, "filter_" + name)(**kw), + lambda name, **kw: genlist(getattr(code, "ifilter_" + name)(**kw)) + ] + for get_filter in funcs: + self.assertEqual(["{{{e}}}"], get_filter("arguments")) + self.assertIs(code.get(4), get_filter("arguments")[0]) + self.assertEqual([], get_filter("comments")) + self.assertEqual([], get_filter("headings")) + self.assertEqual([], get_filter("html_entities")) + self.assertEqual([], get_filter("tags")) + self.assertEqual(["{{b}}", "{{f}}"], get_filter("templates")) + self.assertEqual(["a", "c"], get_filter("text")) + self.assertEqual(["[[d]]", "[[g]]"], get_filter("wikilinks")) + + code2 = parse("{{a|{{b}}|{{c|d={{f}}{{h}}}}}}") + for func in (code2.filter, ifilter(code2)): + self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"], + func(recursive=False, forcetype=Template)) + self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}", + "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"], + func(recursive=True, forcetype=Template)) + + code3 = parse("{{foobar}}{{FOO}}{{baz}}{{bz}}") + for func in (code3.filter, ifilter(code3)): + self.assertEqual(["{{foobar}}", "{{FOO}}"], func(matches=r"foo")) + self.assertEqual(["{{foobar}}", "{{FOO}}"], + func(matches=r"^{{foo.*?}}")) + self.assertEqual(["{{foobar}}"], + func(matches=r"^{{foo.*?}}", flags=re.UNICODE)) + self.assertEqual(["{{baz}}", "{{bz}}"], func(matches=r"^{{b.*?z")) + self.assertEqual(["{{baz}}"], func(matches=r"^{{b.+?z}}")) + + self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"], + code2.filter_templates(recursive=False)) + self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}", + "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"], + code2.filter_templates(recursive=True)) + self.assertEqual(["{{baz}}", "{{bz}}"], + code3.filter_templates(matches=r"^{{b.*?z")) + self.assertEqual([], code3.filter_tags(matches=r"^{{b.*?z")) + self.assertEqual([], code3.filter_tags(matches=r"^{{b.*?z", flags=0)) + + self.assertRaises(TypeError, code.filter_templates, 100) + self.assertRaises(TypeError, code.filter_templates, a=42) + self.assertRaises(TypeError, code.filter_templates, forcetype=Template) def test_get_sections(self): """test Wikicode.get_sections()""" @@ -224,6 +286,5 @@ class TestWikicode(TreeEqualityTestCase): """test Wikicode.get_tree()""" pass - if __name__ == "__main__": unittest.main(verbosity=2) From 0b56f2e2673339c2a096928168f64666b595ab53 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 13 May 2013 02:39:00 -0400 Subject: [PATCH 154/180] Improve Wikicode.get_sections(); implement test_get_tree(); part of test_get_sections() --- mwparserfromhell/wikicode.py | 32 +++++++++++++++++--------------- tests/test_wikicode.py | 44 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 57 insertions(+), 19 deletions(-) diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 365eab7..f258921 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -335,34 +335,36 @@ class Wikicode(StringMixIn): """ return list(self.ifilter(recursive, matches, flags, forcetype)) - def get_sections(self, flat=True, matches=None, levels=None, flags=FLAGS, - include_headings=True): + def get_sections(self, levels=None, matches=None, flags=FLAGS, + include_lead=True, include_headings=True): """Return a list of sections within the page. Sections are returned as :py:class:`~.Wikicode` objects with a shared node list (implemented using :py:class:`~.SmartList`) so that changes to sections are reflected in the parent Wikicode object. - With *flat* as ``True``, each returned section contains all of its - subsections within the :py:class:`~.Wikicode`; otherwise, the returned - sections contain only the section up to the next heading, regardless of - its size. If *matches* is given, it should be a regex to be matched - against the titles of section headings; only sections whose headings - match the regex will be included. If *levels* is given, it should be a - iterable of integers; only sections whose heading levels are within it - will be returned. If *include_headings* is ``True``, the section's - beginning :py:class:`~.Heading` object will be included in returned - :py:class:`~.Wikicode` objects; otherwise, this is skipped. + Each section contains all of its subsections. If *levels* is given, it + should be a iterable of integers; only sections whose heading levels + are within it will be returned.If *matches* is given, it should be a + regex to be matched against the titles of section headings; only + sections whose headings match the regex will be included. *flags* can + be used to override the default regex flags (see :py:meth:`ifilter`) if + *matches* is used. + + If *include_lead* is ``True``, the first, lead section (without a + heading) will be included in the list. If *include_headings* is + ``True``, the section's beginning :py:class:`~.Heading` object will be + included; otherwise, this is skipped. """ if matches: matches = r"^(=+?)\s*" + matches + r"\s*\1$" - headings = self.filter(recursive=True, matches=matches, flags=flags, - forcetype=Heading) + headings = self.filter_headings(recursive=True, matches=matches, + flags=flags) if levels: headings = [head for head in headings if head.level in levels] sections = [] - buffers = [(maxsize, 0)] + buffers = [(maxsize, 0)] if include_lead else [] i = 0 while i < len(self.nodes): if self.nodes[i] in headings: diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 69600c4..4aa07f1 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -26,11 +26,11 @@ from types import GeneratorType import unittest from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, - Tag, Template, Text, Wikilink) + Node, Tag, Template, Text, Wikilink) from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode from mwparserfromhell import parse -from mwparserfromhell.compat import str +from mwparserfromhell.compat import py3k, str from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext @@ -276,7 +276,37 @@ class TestWikicode(TreeEqualityTestCase): def test_get_sections(self): """test Wikicode.get_sections()""" - pass + page1 = "" + page2 = "==Heading==" + page3 = "===Heading===\nFoo bar baz\n====Gnidaeh====\n" + page4 = """ +This is a lead. +== Section I == +Section I body. {{and a|template}} +=== Section I.A === +Section I.A [[body]]. +=== Section I.B === +==== Section I.B.1 ==== +Section I.B.1 body. + +•Some content. + +== Section II == +Section II body. + +== Section III == +=== Section III.A === +Text. +===== Section III.A.1.a ===== +More text. +==== Section III.A.2 ==== +Even more text. +======= section III.A.2.a.i.1 ======= +An invalid section!""" + + self.assertEqual([], parse(page1).get_sections()) + self.assertEqual(["==Heading=="], parse(page2).get_sections()) + self.assertEqual(["===Heading===\nFoo bar baz\n", "====Gnidaeh====\n"], parse(page2).get_sections()) def test_strip_code(self): """test Wikicode.strip_code()""" @@ -284,7 +314,13 @@ class TestWikicode(TreeEqualityTestCase): def test_get_tree(self): """test Wikicode.get_tree()""" - pass + # Since individual nodes have test cases for their __showtree___ + # methods, and the docstring covers all possibilities, this doesn't + # need to test anything other than it: + code = parse("Lorem ipsum {{foo|bar|{{baz}}|spam=eggs}}") + expected = "Lorem ipsum \n{{\n\t foo\n\t| 1\n\t= bar\n\t| 2\n\t= " + \ + "{{\n\t\t\tbaz\n\t }}\n\t| spam\n\t= eggs\n}}" + self.assertEqual(expected.expandtabs(4), code.get_tree()) if __name__ == "__main__": unittest.main(verbosity=2) From 35acc1b812edf46bebcd19c753e170a288c20dc3 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 13 May 2013 18:10:06 -0400 Subject: [PATCH 155/180] Fix a couple bugs. --- mwparserfromhell/wikicode.py | 2 +- tests/test_wikicode.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index f258921..1d5de5d 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -370,7 +370,7 @@ class Wikicode(StringMixIn): if self.nodes[i] in headings: this = self.nodes[i].level for (level, start) in buffers: - if not flat or this <= level: + if this <= level: buffers.remove((level, start)) sections.append(Wikicode(self.nodes[start:i])) buffers.append((this, i)) diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 4aa07f1..1eacb11 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -305,8 +305,8 @@ Even more text. An invalid section!""" self.assertEqual([], parse(page1).get_sections()) - self.assertEqual(["==Heading=="], parse(page2).get_sections()) - self.assertEqual(["===Heading===\nFoo bar baz\n", "====Gnidaeh====\n"], parse(page2).get_sections()) + self.assertEqual(["", "==Heading=="], parse(page2).get_sections()) + self.assertEqual(["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n", "====Gnidaeh====\n"], parse(page3).get_sections()) def test_strip_code(self): """test Wikicode.strip_code()""" From 9ede1121ba4caa547a85a9e71eac4171f95eefa3 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 13 May 2013 18:44:21 -0400 Subject: [PATCH 156/180] Fix tokenizer.c on Windows; add another template test (#25) Mostly by @gdooms, with tweaks. --- mwparserfromhell/parser/tokenizer.c | 47 +++++++++++++++++++++++-------------- mwparserfromhell/parser/tokenizer.h | 1 + tests/tokenizer/templates.mwtest | 9 ++++++- 3 files changed, 39 insertions(+), 18 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 875263c..1fd4804 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -23,6 +23,11 @@ SOFTWARE. #include "tokenizer.h" +double log2(double n) +{ + return log(n) / log(2); +} + static PyObject* Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds) { @@ -52,8 +57,9 @@ Textbuffer_new(void) static void Tokenizer_dealloc(Tokenizer* self) { - Py_XDECREF(self->text); struct Stack *this = self->topstack, *next; + Py_XDECREF(self->text); + while (this) { Py_DECREF(this->stack); Textbuffer_dealloc(this->textbuffer); @@ -139,20 +145,21 @@ Textbuffer_render(struct Textbuffer* self) static int Tokenizer_push_textbuffer(Tokenizer* self) { + PyObject *text, *kwargs, *token; struct Textbuffer* buffer = self->topstack->textbuffer; if (buffer->size == 0 && !buffer->next) return 0; - PyObject* text = Textbuffer_render(buffer); + text = Textbuffer_render(buffer); if (!text) return -1; - PyObject* kwargs = PyDict_New(); + kwargs = PyDict_New(); if (!kwargs) { Py_DECREF(text); return -1; } PyDict_SetItemString(kwargs, "text", text); Py_DECREF(text); - PyObject* token = PyObject_Call(Text, NOARGS, kwargs); + token = PyObject_Call(Text, NOARGS, kwargs); Py_DECREF(kwargs); if (!token) return -1; @@ -185,9 +192,10 @@ Tokenizer_delete_top_of_stack(Tokenizer* self) static PyObject* Tokenizer_pop(Tokenizer* self) { + PyObject* stack; if (Tokenizer_push_textbuffer(self)) return NULL; - PyObject* stack = self->topstack->stack; + stack = self->topstack->stack; Py_INCREF(stack); Tokenizer_delete_top_of_stack(self); return stack; @@ -200,11 +208,13 @@ Tokenizer_pop(Tokenizer* self) static PyObject* Tokenizer_pop_keeping_context(Tokenizer* self) { + PyObject* stack; + int context; if (Tokenizer_push_textbuffer(self)) return NULL; - PyObject* stack = self->topstack->stack; + stack = self->topstack->stack; Py_INCREF(stack); - int context = self->topstack->context; + context = self->topstack->context; Tokenizer_delete_top_of_stack(self); self->topstack->context = context; return stack; @@ -376,9 +386,10 @@ Tokenizer_read(Tokenizer* self, Py_ssize_t delta) static PyObject* Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) { + Py_ssize_t index; if (delta > self->head) return EMPTY; - Py_ssize_t index = self->head - delta; + index = self->head - delta; return PyList_GET_ITEM(self->text, index); } @@ -392,7 +403,7 @@ Tokenizer_parse_template_or_argument(Tokenizer* self) PyObject *tokenlist; self->head += 2; - while (Tokenizer_READ(self, 0) == *"{") { + while (Tokenizer_READ(self, 0) == *"{" && braces < MAX_BRACES) { self->head++; braces++; } @@ -423,8 +434,8 @@ Tokenizer_parse_template_or_argument(Tokenizer* self) if (Tokenizer_parse_template(self)) return -1; if (BAD_ROUTE) { + char text[MAX_BRACES]; RESET_ROUTE(); - char text[braces + 1]; for (i = 0; i < braces; i++) text[i] = *"{"; text[braces] = *""; if (Tokenizer_write_text_then_stack(self, text)) { @@ -635,9 +646,10 @@ Tokenizer_handle_template_end(Tokenizer* self) static int Tokenizer_handle_argument_separator(Tokenizer* self) { + PyObject* token; self->topstack->context ^= LC_ARGUMENT_NAME; self->topstack->context |= LC_ARGUMENT_DEFAULT; - PyObject* token = PyObject_CallObject(ArgumentSeparator, NULL); + token = PyObject_CallObject(ArgumentSeparator, NULL); if (!token) return -1; if (Tokenizer_write(self, token)) { @@ -654,8 +666,8 @@ Tokenizer_handle_argument_separator(Tokenizer* self) static PyObject* Tokenizer_handle_argument_end(Tokenizer* self) { - self->head += 2; PyObject* stack = Tokenizer_pop(self); + self->head += 2; return stack; } @@ -716,9 +728,10 @@ Tokenizer_parse_wikilink(Tokenizer* self) static int Tokenizer_handle_wikilink_separator(Tokenizer* self) { + PyObject* token; self->topstack->context ^= LC_WIKILINK_TITLE; self->topstack->context |= LC_WIKILINK_TEXT; - PyObject* token = PyObject_CallObject(WikilinkSeparator, NULL); + token = PyObject_CallObject(WikilinkSeparator, NULL); if (!token) return -1; if (Tokenizer_write(self, token)) { @@ -735,8 +748,8 @@ Tokenizer_handle_wikilink_separator(Tokenizer* self) static PyObject* Tokenizer_handle_wikilink_end(Tokenizer* self) { - self->head += 1; PyObject* stack = Tokenizer_pop(self); + self->head += 1; return stack; } @@ -1093,9 +1106,9 @@ Tokenizer_parse_comment(Tokenizer* self) self->head += 4; comment = Tokenizer_parse(self, LC_COMMENT); if (BAD_ROUTE) { + const char* text = "" +output: [CommentStart(), CommentEnd()] + +--- + +name: basic +label: a basic comment +input: "" +output: [CommentStart(), Text(text=" comment "), CommentEnd()] + +--- + +name: tons_of_nonsense +label: a comment with tons of ignorable garbage in it +input: "" +output: [CommentStart(), Text(text=" foo{{bar}}[[basé\n\n]{}{}{}{}]{{{{{{haha{{--a>aabsp;" +output: [Text(text="&n"), CommentStart(), Text(text="foo"), CommentEnd(), Text(text="bsp;")] + +--- + +name: wildcard +label: a wildcard assortment of various things +input: "{{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}" +output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), Text(text="biz"), TemplateClose(), Text(text="buzz"), TemplateClose(), Text(text="usr"), TemplateParamSeparator(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()] + +--- + +name: wildcard_redux +label: an even wilder assortment of various things +input: "{{a|b|{{c|[[d]]{{{e}}}}}}}[[f|{{{g}}}]]{{i|j= }}" +output: [TemplateOpen(), Text(text="a"), TemplateParamSeparator(), Text(text="b"), TemplateParamSeparator(), TemplateOpen(), Text(text="c"), TemplateParamSeparator(), WikilinkOpen(), Text(text="d"), WikilinkClose(), ArgumentOpen(), Text(text="e"), ArgumentClose(), TemplateClose(), TemplateClose(), WikilinkOpen(), Text(text="f"), WikilinkSeparator(), ArgumentOpen(), Text(text="g"), ArgumentClose(), CommentStart(), Text(text="h"), CommentEnd(), WikilinkClose(), TemplateOpen(), Text(text="i"), TemplateParamSeparator(), Text(text="j"), TemplateParamEquals(), HTMLEntityStart(), Text(text="nbsp"), HTMLEntityEnd(), TemplateClose()] From 22e869b1429dabd30976e4bdb8b819ed240c3f29 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 19 May 2013 01:45:09 -0400 Subject: [PATCH 169/180] Fix a failing HTML entity test in the C tokenizer. Remove some extraneous whitespace in string_mixin.py. --- mwparserfromhell/parser/tokenizer.c | 19 +++++++++++++++++-- mwparserfromhell/string_mixin.py | 1 - 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index df0882e..939f30c 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -911,8 +911,8 @@ Tokenizer_really_parse_entity(Tokenizer* self) { PyObject *token, *kwargs, *textobj; Py_UNICODE this; - int numeric, hexadecimal, i, j, test; - char *valid, *text, *def; + int numeric, hexadecimal, i, j, zeroes, test; + char *valid, *text, *buffer, *def; #define FAIL_ROUTE_AND_EXIT() { \ Tokenizer_fail_route(self); \ @@ -984,6 +984,7 @@ Tokenizer_really_parse_entity(Tokenizer* self) return -1; } i = 0; + zeroes = 0; while (1) { this = Tokenizer_READ(self, 0); if (this == *";") { @@ -992,6 +993,7 @@ Tokenizer_really_parse_entity(Tokenizer* self) break; } if (i == 0 && this == *"0") { + zeroes++; self->head++; continue; } @@ -1029,6 +1031,19 @@ Tokenizer_really_parse_entity(Tokenizer* self) i++; } } + if (zeroes) { + buffer = calloc(strlen(text) + zeroes + 1, sizeof(char)); + if (!buffer) { + free(text); + PyErr_NoMemory(); + return -1; + } + for (i = 0; i < zeroes; i++) + strcat(buffer, "0"); + strcat(buffer, text); + free(text); + text = buffer; + } textobj = PyUnicode_FromString(text); if (!textobj) { free(text); diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index 6bee9c4..89c1bc0 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -40,7 +40,6 @@ def inheritdoc(method): method.__doc__ = getattr(str, method.__name__).__doc__ return method - class StringMixIn(object): """Implement the interface for ``unicode``/``str`` in a dynamic manner. From ac9b64bf60741232c9d1f2210d287f2e1d481e80 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 1 Jun 2013 15:35:40 -0400 Subject: [PATCH 170/180] Travis integration (closes #33) --- .travis.yml | 6 ++++++ README.rst | 2 ++ 2 files changed, 8 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..71b8eb6 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,6 @@ +language: python +python: + - "2.7" + - "3.3" +install: +script: python setup.py test -q diff --git a/README.rst b/README.rst index 9847c33..0be8fd5 100644 --- a/README.rst +++ b/README.rst @@ -1,6 +1,8 @@ mwparserfromhell ================ +[![Build Status](https://secure.travis-ci.org/earwig/mwparserfromhell.png?branch=develop)](http://travis-ci.org/earwig/mwparserfromhell) + **mwparserfromhell** (the *MediaWiki Parser from Hell*) is a Python package that provides an easy-to-use and outrageously powerful parser for MediaWiki_ wikicode. It supports Python 2 and Python 3. From b2f0f23ea45cb79967e11acc1a14ba4418411913 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 1 Jun 2013 15:48:37 -0400 Subject: [PATCH 171/180] Fix README. --- .travis.yml | 2 +- README.rst | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 71b8eb6..7a9920d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,5 +2,5 @@ language: python python: - "2.7" - "3.3" -install: +install: python setup.py build script: python setup.py test -q diff --git a/README.rst b/README.rst index 0be8fd5..267f7ea 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,9 @@ mwparserfromhell ================ -[![Build Status](https://secure.travis-ci.org/earwig/mwparserfromhell.png?branch=develop)](http://travis-ci.org/earwig/mwparserfromhell) +..image:: https://travis-ci.org/earwig/mwparserfromhell.png?branch=develop + :alt: Build Status + :target: http://travis-ci.org/earwig/mwparserfromhell **mwparserfromhell** (the *MediaWiki Parser from Hell*) is a Python package that provides an easy-to-use and outrageously powerful parser for MediaWiki_ From ce252f69ecb49864b0c4ec98e9b13ca55ce896b8 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 1 Jun 2013 16:00:40 -0400 Subject: [PATCH 172/180] Missed a space. --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 267f7ea..77c01eb 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ mwparserfromhell ================ -..image:: https://travis-ci.org/earwig/mwparserfromhell.png?branch=develop +.. image:: https://travis-ci.org/earwig/mwparserfromhell.png?branch=develop :alt: Build Status :target: http://travis-ci.org/earwig/mwparserfromhell From 81954c50acd88aba523c5064e63a8316692997fb Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 19 Jun 2013 00:22:30 -0400 Subject: [PATCH 173/180] Removing a useless, skipped test. --- tests/test_builder.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/test_builder.py b/tests/test_builder.py index 903d144..2d44b6c 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -190,11 +190,6 @@ class TestBuilder(TreeEqualityTestCase): for test, valid in tests: self.assertWikicodeEqual(valid, self.builder.build(test)) - @unittest.skip("holding this until feature/html_tags is ready") - def test_tag(self): - """tests for building Tag nodes""" - pass - def test_integration(self): """a test for building a combination of templates together""" # {{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}} From 3fb8f3214c91bcd63b5fe4e3a0206a05f8038c39 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 19 Jun 2013 00:39:46 -0400 Subject: [PATCH 174/180] Fix StringMixIn.maketrans() on Py3k. - Make a test in Py3k actually use StringMixIn instead of str. - Minor cosmetic fix. --- mwparserfromhell/nodes/template.py | 2 +- mwparserfromhell/string_mixin.py | 8 ++++---- tests/test_string_mixin.py | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index 3834d41..6dfc4f0 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -293,7 +293,7 @@ class Template(Node): """ name = name.strip() if isinstance(name, basestring) else str(name) removed = False - to_remove =[] + to_remove = [] for i, param in enumerate(self.params): if param.name.strip() == name: if keep_field: diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index 89c1bc0..a406401 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -253,12 +253,12 @@ class StringMixIn(object): if py3k: @staticmethod @inheritdoc - def maketrans(self, x, y=None, z=None): + def maketrans(x, y=None, z=None): if z is None: if y is None: - return self.__unicode__.maketrans(x) - return self.__unicode__.maketrans(x, y) - return self.__unicode__.maketrans(x, y, z) + return str.maketrans(x) + return str.maketrans(x, y) + return str.maketrans(x, y, z) @inheritdoc def partition(self, sep): diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 306f2fd..b829bb2 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -414,10 +414,10 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual("Fake String", str1.title()) if py3k: - table1 = str.maketrans({97: "1", 101: "2", 105: "3", 111: "4", - 117: "5"}) - table2 = str.maketrans("aeiou", "12345") - table3 = str.maketrans("aeiou", "12345", "rts") + table1 = StringMixIn.maketrans({97: "1", 101: "2", 105: "3", + 111: "4", 117: "5"}) + table2 = StringMixIn.maketrans("aeiou", "12345") + table3 = StringMixIn.maketrans("aeiou", "12345", "rts") self.assertEqual("f1k2 str3ng", str1.translate(table1)) self.assertEqual("f1k2 str3ng", str1.translate(table2)) self.assertEqual("f1k2 3ng", str1.translate(table3)) From 22d7995d9b6c47407e0f130df8146debe03c6066 Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 19 Jun 2013 21:07:41 -0400 Subject: [PATCH 175/180] Fix newline behavior when loading test files on Windows. --- tests/_test_tokenizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 382a9bf..c1d49cb 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -109,7 +109,7 @@ class TokenizerTestCase(object): def build(cls): """Load and install all tests from the 'tokenizer' directory.""" def load_file(filename): - with open(filename, "r") as fp: + with open(filename, "rU") as fp: text = fp.read() if not py3k: text = text.decode("utf8") From 25a9f4fe327d5fc95a5b1fb8302934a2b1d03294 Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 19 Jun 2013 21:08:34 -0400 Subject: [PATCH 176/180] Add .dll to .gitignore for builds on Windows. --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index ec4e8ca..4068716 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ *.pyc *.so +*.dll *.egg *.egg-info .DS_Store From a68946757758a7c6936dbe8c8c9295ef263ca97d Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 20 Jun 2013 16:17:39 -0400 Subject: [PATCH 177/180] Replace broken log2 function; add a missing comment. --- mwparserfromhell/parser/tokenizer.c | 16 +++++++++++++--- mwparserfromhell/parser/tokenizer.h | 1 + 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 939f30c..df65d0e 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -23,9 +23,16 @@ SOFTWARE. #include "tokenizer.h" -double log2(double n) +/* + Given a context, return the heading level encoded within it. +*/ +static int heading_level_from_context(int n) { - return log(n) / log(2); + int level; + n /= LC_HEADING_LEVEL_1; + for (level = 1; n > 1; n >>= 1) + level++; + return level; } static PyObject* @@ -175,6 +182,9 @@ Tokenizer_push_textbuffer(Tokenizer* self) return 0; } +/* + Pop and deallocate the top token stack/context/textbuffer. +*/ static void Tokenizer_delete_top_of_stack(Tokenizer* self) { @@ -858,7 +868,7 @@ Tokenizer_handle_heading_end(Tokenizer* self) best++; self->head++; } - current = log2(self->topstack->context / LC_HEADING_LEVEL_1) + 1; + current = heading_level_from_context(self->topstack->context); level = current > best ? (best > 6 ? 6 : best) : (current > 6 ? 6 : current); after = (HeadingData*) Tokenizer_parse(self, self->topstack->context); diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index cdc0cca..1f58c49 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -181,6 +181,7 @@ typedef struct { /* Function prototypes: */ +static int heading_level_from_context(int); static PyObject* Tokenizer_new(PyTypeObject*, PyObject*, PyObject*); static struct Textbuffer* Textbuffer_new(void); static void Tokenizer_dealloc(Tokenizer*); From 72473b433a8219c28245c0d560e9bb30f4df30de Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 20 Jun 2013 17:47:13 -0400 Subject: [PATCH 178/180] Adding a changelog (closes #23) --- CHANGELOG | 33 +++++++++++++++++++++++++++++++ docs/changelog.rst | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ docs/index.rst | 1 + 3 files changed, 92 insertions(+) create mode 100644 CHANGELOG create mode 100644 docs/changelog.rst diff --git a/CHANGELOG b/CHANGELOG new file mode 100644 index 0000000..9772f8b --- /dev/null +++ b/CHANGELOG @@ -0,0 +1,33 @@ +v0.1.1 (19da4d2144) to v0.2: + +- The parser now fully supports Python 3 in addition to Python 2.7. +- Added a C tokenizer extension that is significantly faster than its Python + equivalent. It is enabled by default (if available) and can be toggled by + setting `mwparserfromhell.parser.use_c` to a boolean value. +- Added a complete set of unit tests covering parsing and wikicode + manipulation. +- Renamed Wikicode.filter_links() to filter_wikilinks() (applies to ifilter as + well). +- Added filter methods for Arguments, Comments, Headings, and HTMLEntities. +- Added 'before' param to Template.add(); renamed 'force_nonconformity' to + 'preserve_spacing'. +- Added 'include_lead' param to Wikicode.get_sections(). +- Removed 'flat' param from Wikicode.get_sections(). +- Removed 'force_no_field' param from Template.remove(). +- Added support for Travis CI. +- Added note about Windows build issue in the README. +- The tokenizer will limit itself to a realistic recursion depth to prevent + errors and unreasonably long parse times. +- Fixed how some nodes' attribute setters handle input. +- Fixed multiple bugs in the tokenizer's handling of invalid markup. +- Fixed bugs in the implementation of SmartList and StringMixIn. +- Fixed some broken example code in the README; other copyedits. +- Other bugfixes and code cleanup. + +v0.1 (ba94938fe8) to v0.1.1 (19da4d2144): + +- Added support for Comments () and Wikilinks ([[foo]]). +- Added corresponding ifilter_links() and filter_links() methods to Wikicode. +- Fixed a bug when parsing incomplete templates. +- Fixed strip_code() to affect the contents of headings. +- Various copyedits in documentation and comments. diff --git a/docs/changelog.rst b/docs/changelog.rst new file mode 100644 index 0000000..0e8bbef --- /dev/null +++ b/docs/changelog.rst @@ -0,0 +1,58 @@ +Changelog +========= + +v0.2 +---- + +19da4d2144_ to master_ (released June 20, 2013) + +- The parser now fully supports Python 3 in addition to Python 2.7. +- Added a C tokenizer extension that is significantly faster than its Python + equivalent. It is enabled by default (if available) and can be toggled by + setting :py:attr:`mwparserfromhell.parser.use_c` to a boolean value. +- Added a complete set of unit tests covering parsing and wikicode + manipulation. +- Renamed :py:meth:`.filter_links` to :py:meth:`.filter_wikilinks` (applies to + :py:meth:`.ifilter` as well). +- Added filter methods for :py:class:`Arguments <.Argument>`, + :py:class:`Comments <.Comment>`, :py:class:`Headings <.Heading>`, and + :py:class:`HTMLEntities <.HTMLEntity>`. +- Added *before* param to :py:meth:`Template.add() <.Template.add>`; renamed + *force_nonconformity* to *preserve_spacing*. +- Added *include_lead* param to :py:meth:`Wikicode.get_sections() + <.get_sections>`. +- Removed *flat* param from :py:meth:`.get_sections`. +- Removed *force_no_field* param from :py:meth:`Template.remove() + <.Template.remove>`. +- Added support for Travis CI. +- Added note about Windows build issue in the README. +- The tokenizer will limit itself to a realistic recursion depth to prevent + errors and unreasonably long parse times. +- Fixed how some nodes' attribute setters handle input. +- Fixed multiple bugs in the tokenizer's handling of invalid markup. +- Fixed bugs in the implementation of :py:class:`.SmartList` and + :py:class:`.StringMixIn`. +- Fixed some broken example code in the README; other copyedits. +- Other bugfixes and code cleanup. + +v0.1.1 +------ + +ba94938fe8_ to 19da4d2144_ (released September 21, 2012) + +- Added support for :py:class:`Comments <.Comment>` (````) and + :py:class:`Wikilinks <.Wikilink>` (``[[foo]]``). +- Added corresponding :py:meth:`.ifilter_links` and :py:meth:`.filter_links` + methods to :py:class:`.Wikicode`. +- Fixed a bug when parsing incomplete templates. +- Fixed :py:meth:`.strip_code` to affect the contents of headings. +- Various copyedits in documentation and comments. + +v0.1 +---- + +ba94938fe8_ (released August 23, 2012) + +.. _master: https://github.com/earwig/mwparserfromhell/tree/v0.2 +.. _19da4d2144: https://github.com/earwig/mwparserfromhell/tree/v0.1.1 +.. _ba94938fe8: https://github.com/earwig/mwparserfromhell/tree/v0.1 diff --git a/docs/index.rst b/docs/index.rst index 4b4c392..4355b61 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -41,6 +41,7 @@ Contents usage integration + changelog API Reference From bbaf09dbf8fc2795c424f0934e4dce9924edb009 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 20 Jun 2013 18:07:41 -0400 Subject: [PATCH 179/180] Fix docstrings of generated filter methods. --- mwparserfromhell/wikicode.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 581707d..4ec889e 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -168,7 +168,7 @@ class Wikicode(StringMixIn): doc = """Iterate over {0}. This is equivalent to :py:meth:`{1}` with *forcetype* set to - :py:class:`~.{2}`. + :py:class:`~{2.__module__}.{2.__name__}`. """ make_ifilter = lambda ftype: (lambda self, **kw: self.ifilter(forcetype=ftype, **kw)) @@ -177,8 +177,8 @@ class Wikicode(StringMixIn): for name, ftype in (meths.items() if py3k else meths.iteritems()): ifilter = make_ifilter(ftype) filter = make_filter(ftype) - ifilter.__doc__ = doc.format(name, "ifilter", ftype.__name__) - filter.__doc__ = doc.format(name, "filter", ftype.__name__) + ifilter.__doc__ = doc.format(name, "ifilter", ftype) + filter.__doc__ = doc.format(name, "filter", ftype) setattr(cls, "ifilter_" + name, ifilter) setattr(cls, "filter_" + name, filter) From edf6a3a8a6ad4c31cf8649a273b4e4d0e275003a Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 20 Jun 2013 18:13:52 -0400 Subject: [PATCH 180/180] release/0.2 --- mwparserfromhell/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index 99bc0c2..5db2d4c 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -31,7 +31,7 @@ from __future__ import unicode_literals __author__ = "Ben Kurtovic" __copyright__ = "Copyright (C) 2012, 2013 Ben Kurtovic" __license__ = "MIT License" -__version__ = "0.2.dev" +__version__ = "0.2" __email__ = "ben.kurtovic@verizon.net" from . import compat, nodes, parser, smart_list, string_mixin, utils, wikicode