@@ -1,4 +1,5 @@ | |||
*.pyc | |||
*.so | |||
*.egg | |||
*.egg-info | |||
.DS_Store | |||
@@ -18,7 +18,8 @@ so you can install the latest release with ``pip install mwparserfromhell`` | |||
cd mwparserfromhell | |||
python setup.py install | |||
You can run the comprehensive unit testing suite with ``python setup.py test``. | |||
You can run the comprehensive unit testing suite with | |||
``python setup.py test -q``. | |||
Usage | |||
----- | |||
@@ -124,7 +125,9 @@ following code (via the API_):: | |||
import mwparserfromhell | |||
API_URL = "http://en.wikipedia.org/w/api.php" | |||
def parse(title): | |||
raw = urllib.urlopen(API_URL, data).read() | |||
data = {"action": "query", "prop": "revisions", "rvlimit": 1, | |||
"rvprop": "content", "format": "json", "titles": title} | |||
raw = urllib.urlopen(API_URL, urllib.urlencode(data)).read() | |||
res = json.loads(raw) | |||
text = res["query"]["pages"].values()[0]["revisions"][0]["*"] | |||
return mwparserfromhell.parse(text) | |||
@@ -1,29 +1,29 @@ | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Implements support for both Python 2 and Python 3 by defining common types in | |||
terms of their Python 2/3 variants. For example, :py:class:`str` is set to | |||
:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise, | |||
:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These | |||
types are meant to be imported directly from within the parser's modules. | |||
""" | |||
import sys | |||
py3k = sys.version_info[0] == 3 | |||
if py3k: | |||
bytes = bytes | |||
str = str | |||
basestring = str | |||
maxsize = sys.maxsize | |||
import html.entities as htmlentities | |||
else: | |||
bytes = str | |||
str = unicode | |||
basestring = basestring | |||
maxsize = sys.maxint | |||
import htmlentitydefs as htmlentities | |||
del sys | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Implements support for both Python 2 and Python 3 by defining common types in | |||
terms of their Python 2/3 variants. For example, :py:class:`str` is set to | |||
:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise, | |||
:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These | |||
types are meant to be imported directly from within the parser's modules. | |||
""" | |||
import sys | |||
py3k = sys.version_info[0] == 3 | |||
if py3k: | |||
bytes = bytes | |||
str = str | |||
basestring = str | |||
maxsize = sys.maxsize | |||
import html.entities as htmlentities | |||
else: | |||
bytes = str | |||
str = unicode | |||
basestring = basestring | |||
maxsize = sys.maxint | |||
import htmlentitydefs as htmlentities | |||
del sys |
@@ -30,6 +30,7 @@ __all__ = ["Argument"] | |||
class Argument(Node): | |||
"""Represents a template argument substitution, like ``{{{foo}}}``.""" | |||
def __init__(self, name, default=None): | |||
super(Argument, self).__init__() | |||
self._name = name | |||
@@ -29,6 +29,7 @@ __all__ = ["Comment"] | |||
class Comment(Node): | |||
"""Represents a hidden HTML comment, like ``<!-- foobar -->``.""" | |||
def __init__(self, contents): | |||
super(Comment, self).__init__() | |||
self._contents = contents | |||
@@ -135,7 +135,10 @@ class HTMLEntity(Node): | |||
@hex_char.setter | |||
def hex_char(self, newval): | |||
self._hex_char = bool(newval) | |||
newval = str(newval) | |||
if newval not in ("x", "X"): | |||
raise ValueError(newval) | |||
self._hex_char = newval | |||
def normalize(self): | |||
"""Return the unicode character represented by the HTML entity.""" | |||
@@ -29,6 +29,7 @@ __all__ = ["Text"] | |||
class Text(Node): | |||
"""Represents ordinary, unformatted text with no special properties.""" | |||
def __init__(self, value): | |||
super(Text, self).__init__() | |||
self._value = value | |||
@@ -30,6 +30,7 @@ __all__ = ["Wikilink"] | |||
class Wikilink(Node): | |||
"""Represents an internal wikilink, like ``[[Foo|Bar]]``.""" | |||
def __init__(self, title, text=None): | |||
super(Wikilink, self).__init__() | |||
self._title = title | |||
@@ -62,6 +62,15 @@ Local (stack-specific) contexts: | |||
* :py:const:`COMMENT` | |||
* :py:const:`SAFETY_CHECK` | |||
* :py:const:`HAS_TEXT` | |||
* :py:const:`FAIL_ON_TEXT` | |||
* :py:const:`FAIL_NEXT` | |||
* :py:const:`FAIL_ON_LBRACE` | |||
* :py:const:`FAIL_ON_RBRACE` | |||
* :py:const:`FAIL_ON_EQUALS` | |||
Global contexts: | |||
* :py:const:`GL_HEADING` | |||
@@ -69,29 +78,36 @@ Global contexts: | |||
# Local contexts: | |||
TEMPLATE = 0b00000000000111 | |||
TEMPLATE_NAME = 0b00000000000001 | |||
TEMPLATE_PARAM_KEY = 0b00000000000010 | |||
TEMPLATE_PARAM_VALUE = 0b00000000000100 | |||
ARGUMENT = 0b00000000011000 | |||
ARGUMENT_NAME = 0b00000000001000 | |||
ARGUMENT_DEFAULT = 0b00000000010000 | |||
WIKILINK = 0b00000001100000 | |||
WIKILINK_TITLE = 0b00000000100000 | |||
WIKILINK_TEXT = 0b00000001000000 | |||
HEADING = 0b01111110000000 | |||
HEADING_LEVEL_1 = 0b00000010000000 | |||
HEADING_LEVEL_2 = 0b00000100000000 | |||
HEADING_LEVEL_3 = 0b00001000000000 | |||
HEADING_LEVEL_4 = 0b00010000000000 | |||
HEADING_LEVEL_5 = 0b00100000000000 | |||
HEADING_LEVEL_6 = 0b01000000000000 | |||
COMMENT = 0b10000000000000 | |||
TEMPLATE = 0b00000000000000000111 | |||
TEMPLATE_NAME = 0b00000000000000000001 | |||
TEMPLATE_PARAM_KEY = 0b00000000000000000010 | |||
TEMPLATE_PARAM_VALUE = 0b00000000000000000100 | |||
ARGUMENT = 0b00000000000000011000 | |||
ARGUMENT_NAME = 0b00000000000000001000 | |||
ARGUMENT_DEFAULT = 0b00000000000000010000 | |||
WIKILINK = 0b00000000000001100000 | |||
WIKILINK_TITLE = 0b00000000000000100000 | |||
WIKILINK_TEXT = 0b00000000000001000000 | |||
HEADING = 0b00000001111110000000 | |||
HEADING_LEVEL_1 = 0b00000000000010000000 | |||
HEADING_LEVEL_2 = 0b00000000000100000000 | |||
HEADING_LEVEL_3 = 0b00000000001000000000 | |||
HEADING_LEVEL_4 = 0b00000000010000000000 | |||
HEADING_LEVEL_5 = 0b00000000100000000000 | |||
HEADING_LEVEL_6 = 0b00000001000000000000 | |||
COMMENT = 0b00000010000000000000 | |||
SAFETY_CHECK = 0b11111100000000000000 | |||
HAS_TEXT = 0b00000100000000000000 | |||
FAIL_ON_TEXT = 0b00001000000000000000 | |||
FAIL_NEXT = 0b00010000000000000000 | |||
FAIL_ON_LBRACE = 0b00100000000000000000 | |||
FAIL_ON_RBRACE = 0b01000000000000000000 | |||
FAIL_ON_EQUALS = 0b10000000000000000000 | |||
# Global contexts: | |||
@@ -1,6 +1,6 @@ | |||
/* | |||
Tokenizer for MWParserFromHell | |||
Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||
this software and associated documentation files (the "Software"), to deal in | |||
@@ -843,7 +843,8 @@ Tokenizer_handle_heading_end(Tokenizer* self) | |||
self->head++; | |||
} | |||
current = log2(self->topstack->context / LC_HEADING_LEVEL_1) + 1; | |||
level = current > best ? (best > 6 ? 6 : best) : (current > 6 ? 6 : current); | |||
level = current > best ? (best > 6 ? 6 : best) : | |||
(current > 6 ? 6 : current); | |||
after = (HeadingData*) Tokenizer_parse(self, self->topstack->context); | |||
if (BAD_ROUTE) { | |||
RESET_ROUTE(); | |||
@@ -956,11 +957,11 @@ Tokenizer_really_parse_entity(Tokenizer* self) | |||
else | |||
numeric = hexadecimal = 0; | |||
if (hexadecimal) | |||
valid = "0123456789abcdefABCDEF"; | |||
valid = HEXDIGITS; | |||
else if (numeric) | |||
valid = "0123456789"; | |||
valid = DIGITS; | |||
else | |||
valid = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; | |||
valid = ALPHANUM; | |||
text = calloc(MAX_ENTITY_SIZE, sizeof(char)); | |||
if (!text) { | |||
PyErr_NoMemory(); | |||
@@ -1005,7 +1006,7 @@ Tokenizer_really_parse_entity(Tokenizer* self) | |||
i = 0; | |||
while (1) { | |||
def = entitydefs[i]; | |||
if (!def) // We've reached the end of the def list without finding it | |||
if (!def) // We've reached the end of the defs without finding it | |||
FAIL_ROUTE_AND_EXIT() | |||
if (strcmp(text, def) == 0) | |||
break; | |||
@@ -1135,48 +1136,59 @@ Tokenizer_parse_comment(Tokenizer* self) | |||
} | |||
/* | |||
Make sure we are not trying to write an invalid character. | |||
Make sure we are not trying to write an invalid character. Return 0 if | |||
everything is safe, or -1 if the route must be failed. | |||
*/ | |||
static void | |||
static int | |||
Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) | |||
{ | |||
if (context & LC_FAIL_NEXT) { | |||
Tokenizer_fail_route(self); | |||
return; | |||
return -1; | |||
} | |||
if (context & LC_WIKILINK_TITLE) { | |||
if (data == *"]" || data == *"{") | |||
self->topstack->context |= LC_FAIL_NEXT; | |||
else if (data == *"\n" || data == *"[" || data == *"}") | |||
Tokenizer_fail_route(self); | |||
return; | |||
return -1; | |||
return 0; | |||
} | |||
if (context & LC_TEMPLATE_NAME) { | |||
if (data == *"{" || data == *"}" || data == *"[") { | |||
self->topstack->context |= LC_FAIL_NEXT; | |||
return; | |||
return 0; | |||
} | |||
if (data == *"]") { | |||
Tokenizer_fail_route(self); | |||
return; | |||
return -1; | |||
} | |||
if (data == *"|") | |||
return; | |||
return 0; | |||
if (context & LC_HAS_TEXT) { | |||
if (context & LC_FAIL_ON_TEXT) { | |||
if (!Py_UNICODE_ISSPACE(data)) | |||
return -1; | |||
} | |||
else { | |||
if (data == *"\n") | |||
self->topstack->context |= LC_FAIL_ON_TEXT; | |||
} | |||
} | |||
else if (!Py_UNICODE_ISSPACE(data)) | |||
self->topstack->context |= LC_HAS_TEXT; | |||
} | |||
else if (context & (LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)) { | |||
else { | |||
if (context & LC_FAIL_ON_EQUALS) { | |||
if (data == *"=") { | |||
Tokenizer_fail_route(self); | |||
return; | |||
return -1; | |||
} | |||
} | |||
else if (context & LC_FAIL_ON_LBRACE) { | |||
if (data == *"{") { | |||
if (data == *"{" || (Tokenizer_READ(self, -1) == *"{" && | |||
Tokenizer_READ(self, -2) == *"{")) { | |||
if (context & LC_TEMPLATE) | |||
self->topstack->context |= LC_FAIL_ON_EQUALS; | |||
else | |||
self->topstack->context |= LC_FAIL_NEXT; | |||
return; | |||
return 0; | |||
} | |||
self->topstack->context ^= LC_FAIL_ON_LBRACE; | |||
} | |||
@@ -1186,7 +1198,7 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) | |||
self->topstack->context |= LC_FAIL_ON_EQUALS; | |||
else | |||
self->topstack->context |= LC_FAIL_NEXT; | |||
return; | |||
return 0; | |||
} | |||
self->topstack->context ^= LC_FAIL_ON_RBRACE; | |||
} | |||
@@ -1195,47 +1207,7 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) | |||
else if (data == *"}") | |||
self->topstack->context |= LC_FAIL_ON_RBRACE; | |||
} | |||
if (context & LC_HAS_TEXT) { | |||
if (context & LC_FAIL_ON_TEXT) { | |||
if (!Py_UNICODE_ISSPACE(data)) { | |||
if (context & LC_TEMPLATE_PARAM_KEY) { | |||
self->topstack->context ^= LC_FAIL_ON_TEXT; | |||
self->topstack->context |= LC_FAIL_ON_EQUALS; | |||
} | |||
else | |||
Tokenizer_fail_route(self); | |||
return; | |||
} | |||
} | |||
else { | |||
if (data == *"\n") | |||
self->topstack->context |= LC_FAIL_ON_TEXT; | |||
} | |||
} | |||
else if (!Py_UNICODE_ISSPACE(data)) | |||
self->topstack->context |= LC_HAS_TEXT; | |||
} | |||
/* | |||
Unset any safety-checking contexts set by Tokenizer_verify_safe(). Used | |||
when we preserve a context but previous data becomes invalid, like when | |||
moving between template parameters. | |||
*/ | |||
static void | |||
Tokenizer_reset_safety_checks(Tokenizer* self) | |||
{ | |||
static int checks[] = { | |||
LC_HAS_TEXT, LC_FAIL_ON_TEXT, LC_FAIL_NEXT, LC_FAIL_ON_LBRACE, | |||
LC_FAIL_ON_RBRACE, LC_FAIL_ON_EQUALS, 0}; | |||
int context = self->topstack->context, i = 0, this; | |||
while (1) { | |||
this = checks[i]; | |||
if (!this) | |||
return; | |||
if (context & this) | |||
self->topstack->context ^= this; | |||
i++; | |||
} | |||
return 0; | |||
} | |||
/* | |||
@@ -1258,12 +1230,12 @@ Tokenizer_parse(Tokenizer* self, int context) | |||
this = Tokenizer_READ(self, 0); | |||
this_context = self->topstack->context; | |||
if (this_context & unsafe_contexts) { | |||
Tokenizer_verify_safe(self, this_context, this); | |||
if (BAD_ROUTE) { | |||
if (Tokenizer_verify_safe(self, this_context, this) < 0) { | |||
if (this_context & LC_TEMPLATE_PARAM_KEY) { | |||
trash = Tokenizer_pop(self); | |||
Py_XDECREF(trash); | |||
} | |||
Tokenizer_fail_route(self); | |||
return NULL; | |||
} | |||
} | |||
@@ -1303,7 +1275,6 @@ Tokenizer_parse(Tokenizer* self, int context) | |||
self->topstack->context ^= LC_FAIL_NEXT; | |||
} | |||
else if (this == *"|" && this_context & LC_TEMPLATE) { | |||
Tokenizer_reset_safety_checks(self); | |||
if (Tokenizer_handle_template_param(self)) | |||
return NULL; | |||
} | |||
@@ -1324,10 +1295,14 @@ Tokenizer_parse(Tokenizer* self, int context) | |||
Tokenizer_write_text(self, this); | |||
} | |||
else if (this == next && next == *"[") { | |||
if (Tokenizer_parse_wikilink(self)) | |||
return NULL; | |||
if (self->topstack->context & LC_FAIL_NEXT) | |||
self->topstack->context ^= LC_FAIL_NEXT; | |||
if (!(this_context & LC_WIKILINK_TITLE)) { | |||
if (Tokenizer_parse_wikilink(self)) | |||
return NULL; | |||
if (self->topstack->context & LC_FAIL_NEXT) | |||
self->topstack->context ^= LC_FAIL_NEXT; | |||
} | |||
else | |||
Tokenizer_write_text(self, this); | |||
} | |||
else if (this == *"|" && this_context & LC_WIKILINK_TITLE) { | |||
if (Tokenizer_handle_wikilink_separator(self)) | |||
@@ -1401,7 +1376,8 @@ Tokenizer_tokenize(Tokenizer* self, PyObject* args) | |||
PyMODINIT_FUNC | |||
init_tokenizer(void) | |||
{ | |||
PyObject *module, *tempmodule, *defmap, *deflist, *globals, *locals, *fromlist, *modname; | |||
PyObject *module, *tempmod, *defmap, *deflist, *globals, *locals, | |||
*fromlist, *modname; | |||
unsigned numdefs, i; | |||
char *name; | |||
@@ -1411,14 +1387,16 @@ init_tokenizer(void) | |||
module = Py_InitModule("_tokenizer", module_methods); | |||
Py_INCREF(&TokenizerType); | |||
PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType); | |||
Py_INCREF(Py_True); | |||
PyDict_SetItemString(TokenizerType.tp_dict, "USES_C", Py_True); | |||
tempmodule = PyImport_ImportModule("htmlentitydefs"); | |||
if (!tempmodule) | |||
tempmod = PyImport_ImportModule("htmlentitydefs"); | |||
if (!tempmod) | |||
return; | |||
defmap = PyObject_GetAttrString(tempmodule, "entitydefs"); | |||
defmap = PyObject_GetAttrString(tempmod, "entitydefs"); | |||
if (!defmap) | |||
return; | |||
Py_DECREF(tempmodule); | |||
Py_DECREF(tempmod); | |||
deflist = PyDict_Keys(defmap); | |||
if (!deflist) | |||
return; | |||
@@ -1442,18 +1420,20 @@ init_tokenizer(void) | |||
if (!modname) | |||
return; | |||
PyList_SET_ITEM(fromlist, 0, modname); | |||
tempmodule = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0); | |||
tempmod = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0); | |||
Py_DECREF(fromlist); | |||
if (!tempmodule) | |||
if (!tempmod) | |||
return; | |||
tokens = PyObject_GetAttrString(tempmodule, "tokens"); | |||
Py_DECREF(tempmodule); | |||
tokens = PyObject_GetAttrString(tempmod, "tokens"); | |||
Py_DECREF(tempmod); | |||
Text = PyObject_GetAttrString(tokens, "Text"); | |||
TemplateOpen = PyObject_GetAttrString(tokens, "TemplateOpen"); | |||
TemplateParamSeparator = PyObject_GetAttrString(tokens, "TemplateParamSeparator"); | |||
TemplateParamEquals = PyObject_GetAttrString(tokens, "TemplateParamEquals"); | |||
TemplateParamSeparator = PyObject_GetAttrString(tokens, | |||
"TemplateParamSeparator"); | |||
TemplateParamEquals = PyObject_GetAttrString(tokens, | |||
"TemplateParamEquals"); | |||
TemplateClose = PyObject_GetAttrString(tokens, "TemplateClose"); | |||
ArgumentOpen = PyObject_GetAttrString(tokens, "ArgumentOpen"); | |||
@@ -1,6 +1,6 @@ | |||
/* | |||
Tokenizer Header File for MWParserFromHell | |||
Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
Permission is hereby granted, free of charge, to any person obtaining a copy of | |||
this software and associated documentation files (the "Software"), to deal in | |||
@@ -36,6 +36,10 @@ SOFTWARE. | |||
#define malloc PyObject_Malloc | |||
#define free PyObject_Free | |||
#define DIGITS "0123456789" | |||
#define HEXDIGITS "0123456789abcdefABCDEF" | |||
#define ALPHANUM "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | |||
static const char* MARKERS[] = { | |||
"{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", "/", "-", | |||
"!", "\n", ""}; | |||
@@ -118,6 +122,7 @@ static PyObject* TagCloseClose; | |||
#define LC_COMMENT 0x02000 | |||
#define LC_SAFETY_CHECK 0xFC000 | |||
#define LC_HAS_TEXT 0x04000 | |||
#define LC_FAIL_ON_TEXT 0x08000 | |||
#define LC_FAIL_NEXT 0x10000 | |||
@@ -205,8 +210,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer*); | |||
static int Tokenizer_really_parse_entity(Tokenizer*); | |||
static int Tokenizer_parse_entity(Tokenizer*); | |||
static int Tokenizer_parse_comment(Tokenizer*); | |||
static void Tokenizer_verify_safe(Tokenizer*, int, Py_UNICODE); | |||
static void Tokenizer_reset_safety_checks(Tokenizer*); | |||
static int Tokenizer_verify_safe(Tokenizer*, int, Py_UNICODE); | |||
static PyObject* Tokenizer_parse(Tokenizer*, int); | |||
static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*); | |||
@@ -37,6 +37,7 @@ class BadRoute(Exception): | |||
class Tokenizer(object): | |||
"""Creates a list of tokens from a string of wikicode.""" | |||
USES_C = False | |||
START = object() | |||
END = object() | |||
MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", | |||
@@ -212,28 +213,9 @@ class Tokenizer(object): | |||
self._write_all(argument) | |||
self._write(tokens.ArgumentClose()) | |||
def _verify_safe(self, unsafes, strip=True): | |||
"""Verify that there are no unsafe characters in the current stack. | |||
The route will be failed if the name contains any element of *unsafes* | |||
in it. This is used when parsing template names, parameter keys, and so | |||
on, which cannot contain newlines and some other characters. If *strip* | |||
is ``True``, the text will be stripped of whitespace, since this is | |||
allowed at the ends of certain elements but not between text. | |||
""" | |||
self._push_textbuffer() | |||
if self._stack: | |||
text = [tok for tok in self._stack if isinstance(tok, tokens.Text)] | |||
text = "".join([token.text for token in text]) | |||
if strip: | |||
text = text.strip() | |||
if text and any([unsafe in text for unsafe in unsafes]): | |||
self._fail_route() | |||
def _handle_template_param(self): | |||
"""Handle a template parameter at the head of the string.""" | |||
if self._context & contexts.TEMPLATE_NAME: | |||
self._verify_safe(["\n", "{", "}", "[", "]"]) | |||
self._context ^= contexts.TEMPLATE_NAME | |||
elif self._context & contexts.TEMPLATE_PARAM_VALUE: | |||
self._context ^= contexts.TEMPLATE_PARAM_VALUE | |||
@@ -245,11 +227,6 @@ class Tokenizer(object): | |||
def _handle_template_param_value(self): | |||
"""Handle a template parameter's value at the head of the string.""" | |||
try: | |||
self._verify_safe(["\n", "{{", "}}"]) | |||
except BadRoute: | |||
self._pop() | |||
raise | |||
self._write_all(self._pop(keep_context=True)) | |||
self._context ^= contexts.TEMPLATE_PARAM_KEY | |||
self._context |= contexts.TEMPLATE_PARAM_VALUE | |||
@@ -257,24 +234,19 @@ class Tokenizer(object): | |||
def _handle_template_end(self): | |||
"""Handle the end of a template at the head of the string.""" | |||
if self._context & contexts.TEMPLATE_NAME: | |||
self._verify_safe(["\n", "{", "}", "[", "]"]) | |||
elif self._context & contexts.TEMPLATE_PARAM_KEY: | |||
if self._context & contexts.TEMPLATE_PARAM_KEY: | |||
self._write_all(self._pop(keep_context=True)) | |||
self._head += 1 | |||
return self._pop() | |||
def _handle_argument_separator(self): | |||
"""Handle the separator between an argument's name and default.""" | |||
self._verify_safe(["\n", "{{", "}}"]) | |||
self._context ^= contexts.ARGUMENT_NAME | |||
self._context |= contexts.ARGUMENT_DEFAULT | |||
self._write(tokens.ArgumentSeparator()) | |||
def _handle_argument_end(self): | |||
"""Handle the end of an argument at the head of the string.""" | |||
if self._context & contexts.ARGUMENT_NAME: | |||
self._verify_safe(["\n", "{{", "}}"]) | |||
self._head += 2 | |||
return self._pop() | |||
@@ -294,15 +266,12 @@ class Tokenizer(object): | |||
def _handle_wikilink_separator(self): | |||
"""Handle the separator between a wikilink's title and its text.""" | |||
self._verify_safe(["\n", "{", "}", "[", "]"], strip=False) | |||
self._context ^= contexts.WIKILINK_TITLE | |||
self._context |= contexts.WIKILINK_TEXT | |||
self._write(tokens.WikilinkSeparator()) | |||
def _handle_wikilink_end(self): | |||
"""Handle the end of a wikilink at the head of the string.""" | |||
if self._context & contexts.WIKILINK_TITLE: | |||
self._verify_safe(["\n", "{", "}", "[", "]"], strip=False) | |||
self._head += 1 | |||
return self._pop() | |||
@@ -423,11 +392,73 @@ class Tokenizer(object): | |||
self._write(tokens.CommentEnd()) | |||
self._head += 2 | |||
def _verify_safe(self, this): | |||
"""Make sure we are not trying to write an invalid character.""" | |||
context = self._context | |||
if context & contexts.FAIL_NEXT: | |||
return False | |||
if context & contexts.WIKILINK_TITLE: | |||
if this == "]" or this == "{": | |||
self._context |= contexts.FAIL_NEXT | |||
elif this == "\n" or this == "[" or this == "}": | |||
return False | |||
return True | |||
if context & contexts.TEMPLATE_NAME: | |||
if this == "{" or this == "}" or this == "[": | |||
self._context |= contexts.FAIL_NEXT | |||
return True | |||
if this == "]": | |||
return False | |||
if this == "|": | |||
return True | |||
if context & contexts.HAS_TEXT: | |||
if context & contexts.FAIL_ON_TEXT: | |||
if this is self.END or not this.isspace(): | |||
return False | |||
else: | |||
if this == "\n": | |||
self._context |= contexts.FAIL_ON_TEXT | |||
elif this is not self.END or not this.isspace(): | |||
self._context |= contexts.HAS_TEXT | |||
return True | |||
else: | |||
if context & contexts.FAIL_ON_EQUALS: | |||
if this == "=": | |||
return False | |||
elif context & contexts.FAIL_ON_LBRACE: | |||
if this == "{" or (self._read(-1) == self._read(-2) == "{"): | |||
if context & contexts.TEMPLATE: | |||
self._context |= contexts.FAIL_ON_EQUALS | |||
else: | |||
self._context |= contexts.FAIL_NEXT | |||
return True | |||
self._context ^= contexts.FAIL_ON_LBRACE | |||
elif context & contexts.FAIL_ON_RBRACE: | |||
if this == "}": | |||
if context & contexts.TEMPLATE: | |||
self._context |= contexts.FAIL_ON_EQUALS | |||
else: | |||
self._context |= contexts.FAIL_NEXT | |||
return True | |||
self._context ^= contexts.FAIL_ON_RBRACE | |||
elif this == "{": | |||
self._context |= contexts.FAIL_ON_LBRACE | |||
elif this == "}": | |||
self._context |= contexts.FAIL_ON_RBRACE | |||
return True | |||
def _parse(self, context=0): | |||
"""Parse the wikicode string, using *context* for when to stop.""" | |||
self._push(context) | |||
while True: | |||
this = self._read() | |||
unsafe = (contexts.TEMPLATE_NAME | contexts.WIKILINK_TITLE | | |||
contexts.TEMPLATE_PARAM_KEY | contexts.ARGUMENT_NAME) | |||
if self._context & unsafe: | |||
if not self._verify_safe(this): | |||
if self._context & contexts.TEMPLATE_PARAM_KEY: | |||
self._pop() | |||
self._fail_route() | |||
if this not in self.MARKERS: | |||
self._write_text(this) | |||
self._head += 1 | |||
@@ -449,6 +480,8 @@ class Tokenizer(object): | |||
self._write_text(this) | |||
elif this == next == "{": | |||
self._parse_template_or_argument() | |||
if self._context & contexts.FAIL_NEXT: | |||
self._context ^= contexts.FAIL_NEXT | |||
elif this == "|" and self._context & contexts.TEMPLATE: | |||
self._handle_template_param() | |||
elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY: | |||
@@ -465,6 +498,8 @@ class Tokenizer(object): | |||
elif this == next == "[": | |||
if not self._context & contexts.WIKILINK_TITLE: | |||
self._parse_wikilink() | |||
if self._context & contexts.FAIL_NEXT: | |||
self._context ^= contexts.FAIL_NEXT | |||
else: | |||
self._write_text("[") | |||
elif this == "|" and self._context & contexts.WIKILINK_TITLE: | |||
@@ -41,8 +41,23 @@ def inheritdoc(method): | |||
method.__doc__ = getattr(list, method.__name__).__doc__ | |||
return method | |||
class _SliceNormalizerMixIn(object): | |||
"""MixIn that provides a private method to normalize slices.""" | |||
class SmartList(list): | |||
def _normalize_slice(self, key): | |||
"""Return a slice equivalent to the input *key*, standardized.""" | |||
if key.start is not None: | |||
start = (len(self) + key.start) if key.start < 0 else key.start | |||
else: | |||
start = 0 | |||
if key.stop is not None: | |||
stop = (len(self) + key.stop) if key.stop < 0 else key.stop | |||
else: | |||
stop = maxsize | |||
return slice(start, stop, key.step or 1) | |||
class SmartList(_SliceNormalizerMixIn, list): | |||
"""Implements the ``list`` interface with special handling of sublists. | |||
When a sublist is created (by ``list[i:j]``), any changes made to this | |||
@@ -76,7 +91,8 @@ class SmartList(list): | |||
def __getitem__(self, key): | |||
if not isinstance(key, slice): | |||
return super(SmartList, self).__getitem__(key) | |||
sliceinfo = [key.start or 0, key.stop or 0, key.step or 1] | |||
key = self._normalize_slice(key) | |||
sliceinfo = [key.start, key.stop, key.step] | |||
child = _ListProxy(self, sliceinfo) | |||
self._children[id(child)] = (child, sliceinfo) | |||
return child | |||
@@ -86,25 +102,28 @@ class SmartList(list): | |||
return super(SmartList, self).__setitem__(key, item) | |||
item = list(item) | |||
super(SmartList, self).__setitem__(key, item) | |||
diff = len(item) - key.stop + key.start | |||
key = self._normalize_slice(key) | |||
diff = len(item) + (key.start - key.stop) // key.step | |||
values = self._children.values if py3k else self._children.itervalues | |||
if diff: | |||
for child, (start, stop, step) in values(): | |||
if start >= key.stop: | |||
if start > key.stop: | |||
self._children[id(child)][1][0] += diff | |||
if stop >= key.stop and stop != maxsize: | |||
self._children[id(child)][1][1] += diff | |||
def __delitem__(self, key): | |||
super(SmartList, self).__delitem__(key) | |||
if not isinstance(key, slice): | |||
key = slice(key, key + 1) | |||
diff = key.stop - key.start | |||
if isinstance(key, slice): | |||
key = self._normalize_slice(key) | |||
else: | |||
key = slice(key, key + 1, 1) | |||
diff = (key.stop - key.start) // key.step | |||
values = self._children.values if py3k else self._children.itervalues | |||
for child, (start, stop, step) in values(): | |||
if start > key.start: | |||
self._children[id(child)][1][0] -= diff | |||
if stop >= key.stop: | |||
if stop >= key.stop and stop != maxsize: | |||
self._children[id(child)][1][1] -= diff | |||
if not py3k: | |||
@@ -160,24 +179,35 @@ class SmartList(list): | |||
child._parent = copy | |||
super(SmartList, self).reverse() | |||
@inheritdoc | |||
def sort(self, cmp=None, key=None, reverse=None): | |||
copy = list(self) | |||
for child in self._children: | |||
child._parent = copy | |||
if cmp is not None: | |||
if py3k: | |||
@inheritdoc | |||
def sort(self, key=None, reverse=None): | |||
copy = list(self) | |||
for child in self._children: | |||
child._parent = copy | |||
kwargs = {} | |||
if key is not None: | |||
if reverse is not None: | |||
super(SmartList, self).sort(cmp, key, reverse) | |||
else: | |||
super(SmartList, self).sort(cmp, key) | |||
else: | |||
super(SmartList, self).sort(cmp) | |||
else: | |||
super(SmartList, self).sort() | |||
kwargs["key"] = key | |||
if reverse is not None: | |||
kwargs["reverse"] = reverse | |||
super(SmartList, self).sort(**kwargs) | |||
else: | |||
@inheritdoc | |||
def sort(self, cmp=None, key=None, reverse=None): | |||
copy = list(self) | |||
for child in self._children: | |||
child._parent = copy | |||
kwargs = {} | |||
if cmp is not None: | |||
kwargs["cmp"] = cmp | |||
if key is not None: | |||
kwargs["key"] = key | |||
if reverse is not None: | |||
kwargs["reverse"] = reverse | |||
super(SmartList, self).sort(**kwargs) | |||
class _ListProxy(list): | |||
class _ListProxy(_SliceNormalizerMixIn, list): | |||
"""Implement the ``list`` interface by getting elements from a parent. | |||
This is created by a :py:class:`~.SmartList` object when slicing. It does | |||
@@ -231,25 +261,52 @@ class _ListProxy(list): | |||
return bool(self._render()) | |||
def __len__(self): | |||
return (self._stop - self._start) / self._step | |||
return (self._stop - self._start) // self._step | |||
def __getitem__(self, key): | |||
return self._render()[key] | |||
if isinstance(key, slice): | |||
key = self._normalize_slice(key) | |||
if key.stop == maxsize: | |||
keystop = self._stop | |||
else: | |||
keystop = key.stop + self._start | |||
adjusted = slice(key.start + self._start, keystop, key.step) | |||
return self._parent[adjusted] | |||
else: | |||
return self._render()[key] | |||
def __setitem__(self, key, item): | |||
if isinstance(key, slice): | |||
adjusted = slice(key.start + self._start, key.stop + self._stop, | |||
key.step) | |||
key = self._normalize_slice(key) | |||
if key.stop == maxsize: | |||
keystop = self._stop | |||
else: | |||
keystop = key.stop + self._start | |||
adjusted = slice(key.start + self._start, keystop, key.step) | |||
self._parent[adjusted] = item | |||
else: | |||
length = len(self) | |||
if key < 0: | |||
key = length + key | |||
if key < 0 or key >= length: | |||
raise IndexError("list assignment index out of range") | |||
self._parent[self._start + key] = item | |||
def __delitem__(self, key): | |||
if isinstance(key, slice): | |||
adjusted = slice(key.start + self._start, key.stop + self._stop, | |||
key.step) | |||
key = self._normalize_slice(key) | |||
if key.stop == maxsize: | |||
keystop = self._stop | |||
else: | |||
keystop = key.stop + self._start | |||
adjusted = slice(key.start + self._start, keystop, key.step) | |||
del self._parent[adjusted] | |||
else: | |||
length = len(self) | |||
if key < 0: | |||
key = length + key | |||
if key < 0 or key >= length: | |||
raise IndexError("list assignment index out of range") | |||
del self._parent[self._start + key] | |||
def __iter__(self): | |||
@@ -287,6 +344,16 @@ class _ListProxy(list): | |||
self.extend(other) | |||
return self | |||
def __mul__(self, other): | |||
return SmartList(list(self) * other) | |||
def __rmul__(self, other): | |||
return SmartList(other * list(self)) | |||
def __imul__(self, other): | |||
self.extend(list(self) * (other - 1)) | |||
return self | |||
@property | |||
def _start(self): | |||
"""The starting index of this list, inclusive.""" | |||
@@ -295,6 +362,8 @@ class _ListProxy(list): | |||
@property | |||
def _stop(self): | |||
"""The ending index of this list, exclusive.""" | |||
if self._sliceinfo[1] == maxsize: | |||
return len(self._parent) | |||
return self._sliceinfo[1] | |||
@property | |||
@@ -328,18 +397,25 @@ class _ListProxy(list): | |||
@inheritdoc | |||
def insert(self, index, item): | |||
if index < 0: | |||
index = len(self) + index | |||
self._parent.insert(self._start + index, item) | |||
@inheritdoc | |||
def pop(self, index=None): | |||
length = len(self) | |||
if index is None: | |||
index = len(self) - 1 | |||
index = length - 1 | |||
elif index < 0: | |||
index = length + index | |||
if index < 0 or index >= length: | |||
raise IndexError("pop index out of range") | |||
return self._parent.pop(self._start + index) | |||
@inheritdoc | |||
def remove(self, item): | |||
index = self.index(item) | |||
del self._parent[index] | |||
del self._parent[self._start + index] | |||
@inheritdoc | |||
def reverse(self): | |||
@@ -347,17 +423,30 @@ class _ListProxy(list): | |||
item.reverse() | |||
self._parent[self._start:self._stop:self._step] = item | |||
@inheritdoc | |||
def sort(self, cmp=None, key=None, reverse=None): | |||
item = self._render() | |||
if cmp is not None: | |||
if py3k: | |||
@inheritdoc | |||
def sort(self, key=None, reverse=None): | |||
item = self._render() | |||
kwargs = {} | |||
if key is not None: | |||
if reverse is not None: | |||
item.sort(cmp, key, reverse) | |||
else: | |||
item.sort(cmp, key) | |||
else: | |||
item.sort(cmp) | |||
else: | |||
item.sort() | |||
self._parent[self._start:self._stop:self._step] = item | |||
kwargs["key"] = key | |||
if reverse is not None: | |||
kwargs["reverse"] = reverse | |||
item.sort(**kwargs) | |||
self._parent[self._start:self._stop:self._step] = item | |||
else: | |||
@inheritdoc | |||
def sort(self, cmp=None, key=None, reverse=None): | |||
item = self._render() | |||
kwargs = {} | |||
if cmp is not None: | |||
kwargs["cmp"] = cmp | |||
if key is not None: | |||
kwargs["key"] = key | |||
if reverse is not None: | |||
kwargs["reverse"] = reverse | |||
item.sort(**kwargs) | |||
self._parent[self._start:self._stop:self._step] = item | |||
del inheritdoc |
@@ -114,6 +114,9 @@ class StringMixIn(object): | |||
def __getitem__(self, key): | |||
return self.__unicode__()[key] | |||
def __reversed__(self): | |||
return reversed(self.__unicode__()) | |||
def __contains__(self, item): | |||
if isinstance(item, StringMixIn): | |||
return str(item) in self.__unicode__() | |||
@@ -123,6 +126,11 @@ class StringMixIn(object): | |||
def capitalize(self): | |||
return self.__unicode__().capitalize() | |||
if py3k: | |||
@inheritdoc | |||
def casefold(self): | |||
return self.__unicode__().casefold() | |||
@inheritdoc | |||
def center(self, width, fillchar=None): | |||
if fillchar is None: | |||
@@ -136,19 +144,21 @@ class StringMixIn(object): | |||
if not py3k: | |||
@inheritdoc | |||
def decode(self, encoding=None, errors=None): | |||
if errors is None: | |||
if encoding is None: | |||
return self.__unicode__().decode() | |||
return self.__unicode__().decode(encoding) | |||
return self.__unicode__().decode(encoding, errors) | |||
kwargs = {} | |||
if encoding is not None: | |||
kwargs["encoding"] = encoding | |||
if errors is not None: | |||
kwargs["errors"] = errors | |||
return self.__unicode__().decode(**kwargs) | |||
@inheritdoc | |||
def encode(self, encoding=None, errors=None): | |||
if errors is None: | |||
if encoding is None: | |||
return self.__unicode__().encode() | |||
return self.__unicode__().encode(encoding) | |||
return self.__unicode__().encode(encoding, errors) | |||
kwargs = {} | |||
if encoding is not None: | |||
kwargs["encoding"] = encoding | |||
if errors is not None: | |||
kwargs["errors"] = errors | |||
return self.__unicode__().encode(**kwargs) | |||
@inheritdoc | |||
def endswith(self, prefix, start=None, end=None): | |||
@@ -168,6 +178,11 @@ class StringMixIn(object): | |||
def format(self, *args, **kwargs): | |||
return self.__unicode__().format(*args, **kwargs) | |||
if py3k: | |||
@inheritdoc | |||
def format_map(self, mapping): | |||
return self.__unicode__().format_map(mapping) | |||
@inheritdoc | |||
def index(self, sub, start=None, end=None): | |||
return self.__unicode__().index(sub, start, end) | |||
@@ -188,6 +203,11 @@ class StringMixIn(object): | |||
def isdigit(self): | |||
return self.__unicode__().isdigit() | |||
if py3k: | |||
@inheritdoc | |||
def isidentifier(self): | |||
return self.__unicode__().isidentifier() | |||
@inheritdoc | |||
def islower(self): | |||
return self.__unicode__().islower() | |||
@@ -196,6 +216,11 @@ class StringMixIn(object): | |||
def isnumeric(self): | |||
return self.__unicode__().isnumeric() | |||
if py3k: | |||
@inheritdoc | |||
def isprintable(self): | |||
return self.__unicode__().isprintable() | |||
@inheritdoc | |||
def isspace(self): | |||
return self.__unicode__().isspace() | |||
@@ -226,12 +251,24 @@ class StringMixIn(object): | |||
def lstrip(self, chars=None): | |||
return self.__unicode__().lstrip(chars) | |||
if py3k: | |||
@staticmethod | |||
@inheritdoc | |||
def maketrans(self, x, y=None, z=None): | |||
if z is None: | |||
if y is None: | |||
return self.__unicode__.maketrans(x) | |||
return self.__unicode__.maketrans(x, y) | |||
return self.__unicode__.maketrans(x, y, z) | |||
@inheritdoc | |||
def partition(self, sep): | |||
return self.__unicode__().partition(sep) | |||
@inheritdoc | |||
def replace(self, old, new, count): | |||
def replace(self, old, new, count=None): | |||
if count is None: | |||
return self.__unicode__().replace(old, new) | |||
return self.__unicode__().replace(old, new, count) | |||
@inheritdoc | |||
@@ -252,25 +289,45 @@ class StringMixIn(object): | |||
def rpartition(self, sep): | |||
return self.__unicode__().rpartition(sep) | |||
@inheritdoc | |||
def rsplit(self, sep=None, maxsplit=None): | |||
if maxsplit is None: | |||
if sep is None: | |||
return self.__unicode__().rsplit() | |||
return self.__unicode__().rsplit(sep) | |||
return self.__unicode__().rsplit(sep, maxsplit) | |||
if py3k: | |||
@inheritdoc | |||
def rsplit(self, sep=None, maxsplit=None): | |||
kwargs = {} | |||
if sep is not None: | |||
kwargs["sep"] = sep | |||
if maxsplit is not None: | |||
kwargs["maxsplit"] = maxsplit | |||
return self.__unicode__().rsplit(**kwargs) | |||
else: | |||
@inheritdoc | |||
def rsplit(self, sep=None, maxsplit=None): | |||
if maxsplit is None: | |||
if sep is None: | |||
return self.__unicode__().rsplit() | |||
return self.__unicode__().rsplit(sep) | |||
return self.__unicode__().rsplit(sep, maxsplit) | |||
@inheritdoc | |||
def rstrip(self, chars=None): | |||
return self.__unicode__().rstrip(chars) | |||
@inheritdoc | |||
def split(self, sep=None, maxsplit=None): | |||
if maxsplit is None: | |||
if sep is None: | |||
return self.__unicode__().split() | |||
return self.__unicode__().split(sep) | |||
return self.__unicode__().split(sep, maxsplit) | |||
if py3k: | |||
@inheritdoc | |||
def split(self, sep=None, maxsplit=None): | |||
kwargs = {} | |||
if sep is not None: | |||
kwargs["sep"] = sep | |||
if maxsplit is not None: | |||
kwargs["maxsplit"] = maxsplit | |||
return self.__unicode__().split(**kwargs) | |||
else: | |||
@inheritdoc | |||
def split(self, sep=None, maxsplit=None): | |||
if maxsplit is None: | |||
if sep is None: | |||
return self.__unicode__().split() | |||
return self.__unicode__().split(sep) | |||
return self.__unicode__().split(sep, maxsplit) | |||
@inheritdoc | |||
def splitlines(self, keepends=None): | |||
@@ -24,6 +24,7 @@ | |||
from setuptools import setup, find_packages, Extension | |||
from mwparserfromhell import __version__ | |||
from mwparserfromhell.compat import py3k | |||
with open("README.rst") as fp: | |||
long_docs = fp.read() | |||
@@ -37,7 +38,7 @@ tokenizer = Extension("mwparserfromhell.parser._tokenizer", | |||
setup( | |||
name = "mwparserfromhell", | |||
packages = find_packages(exclude=("tests",)), | |||
ext_modules = [tokenizer], | |||
ext_modules = [] if py3k else [tokenizer], | |||
test_suite = "tests", | |||
version = __version__, | |||
author = "Ben Kurtovic", | |||
@@ -0,0 +1,130 @@ | |||
<?xml version="1.0" encoding="UTF-8"?> | |||
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> | |||
<plist version="1.0"> | |||
<dict> | |||
<key>fileTypes</key> | |||
<array> | |||
<string>mwtest</string> | |||
</array> | |||
<key>name</key> | |||
<string>MWParserFromHell Test Case</string> | |||
<key>patterns</key> | |||
<array> | |||
<dict> | |||
<key>match</key> | |||
<string>---</string> | |||
<key>name</key> | |||
<string>markup.heading.divider.mwpfh</string> | |||
</dict> | |||
<dict> | |||
<key>captures</key> | |||
<dict> | |||
<key>1</key> | |||
<dict> | |||
<key>name</key> | |||
<string>keyword.other.name.mwpfh</string> | |||
</dict> | |||
<key>2</key> | |||
<dict> | |||
<key>name</key> | |||
<string>variable.other.name.mwpfh</string> | |||
</dict> | |||
</dict> | |||
<key>match</key> | |||
<string>(name:)\s*(\w*)</string> | |||
<key>name</key> | |||
<string>meta.name.mwpfh</string> | |||
</dict> | |||
<dict> | |||
<key>captures</key> | |||
<dict> | |||
<key>1</key> | |||
<dict> | |||
<key>name</key> | |||
<string>keyword.other.label.mwpfh</string> | |||
</dict> | |||
<key>2</key> | |||
<dict> | |||
<key>name</key> | |||
<string>comment.line.other.label.mwpfh</string> | |||
</dict> | |||
</dict> | |||
<key>match</key> | |||
<string>(label:)\s*(.*)</string> | |||
<key>name</key> | |||
<string>meta.label.mwpfh</string> | |||
</dict> | |||
<dict> | |||
<key>captures</key> | |||
<dict> | |||
<key>1</key> | |||
<dict> | |||
<key>name</key> | |||
<string>keyword.other.input.mwpfh</string> | |||
</dict> | |||
<key>2</key> | |||
<dict> | |||
<key>name</key> | |||
<string>string.quoted.double.input.mwpfh</string> | |||
</dict> | |||
</dict> | |||
<key>match</key> | |||
<string>(input:)\s*(.*)</string> | |||
<key>name</key> | |||
<string>meta.input.mwpfh</string> | |||
</dict> | |||
<dict> | |||
<key>captures</key> | |||
<dict> | |||
<key>1</key> | |||
<dict> | |||
<key>name</key> | |||
<string>keyword.other.output.mwpfh</string> | |||
</dict> | |||
</dict> | |||
<key>match</key> | |||
<string>(output:)</string> | |||
<key>name</key> | |||
<string>meta.output.mwpfh</string> | |||
</dict> | |||
<dict> | |||
<key>captures</key> | |||
<dict> | |||
<key>1</key> | |||
<dict> | |||
<key>name</key> | |||
<string>support.language.token.mwpfh</string> | |||
</dict> | |||
</dict> | |||
<key>match</key> | |||
<string>(\w+)\s*\(</string> | |||
<key>name</key> | |||
<string>meta.name.token.mwpfh</string> | |||
</dict> | |||
<dict> | |||
<key>captures</key> | |||
<dict> | |||
<key>1</key> | |||
<dict> | |||
<key>name</key> | |||
<string>variable.parameter.token.mwpfh</string> | |||
</dict> | |||
</dict> | |||
<key>match</key> | |||
<string>(\w+)\s*(=)</string> | |||
<key>name</key> | |||
<string>meta.name.parameter.token.mwpfh</string> | |||
</dict> | |||
<dict> | |||
<key>match</key> | |||
<string>".*?"</string> | |||
<key>name</key> | |||
<string>string.quoted.double.mwpfh</string> | |||
</dict> | |||
</array> | |||
<key>scopeName</key> | |||
<string>text.mwpfh</string> | |||
<key>uuid</key> | |||
<string>cd3e2ffa-a57d-4c40-954f-1a2e87ffd638</string> | |||
</dict> | |||
</plist> |
@@ -0,0 +1,121 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import print_function, unicode_literals | |||
from os import listdir, path | |||
from mwparserfromhell.compat import py3k | |||
from mwparserfromhell.parser import tokens | |||
class _TestParseError(Exception): | |||
"""Raised internally when a test could not be parsed.""" | |||
pass | |||
class TokenizerTestCase(object): | |||
"""A base test case for tokenizers, whose tests are loaded dynamically. | |||
Subclassed along with unittest.TestCase to form TestPyTokenizer and | |||
TestCTokenizer. Tests are loaded dynamically from files in the 'tokenizer' | |||
directory. | |||
""" | |||
@classmethod | |||
def _build_test_method(cls, funcname, data): | |||
"""Create and return a method to be treated as a test case method. | |||
*data* is a dict containing multiple keys: the *input* text to be | |||
tokenized, the expected list of tokens as *output*, and an optional | |||
*label* for the method's docstring. | |||
""" | |||
def inner(self): | |||
expected = data["output"] | |||
actual = self.tokenizer().tokenize(data["input"]) | |||
self.assertEqual(expected, actual) | |||
if not py3k: | |||
inner.__name__ = funcname.encode("utf8") | |||
inner.__doc__ = data["label"] | |||
return inner | |||
@classmethod | |||
def _load_tests(cls, filename, text): | |||
"""Load all tests in *text* from the file *filename*.""" | |||
tests = text.split("\n---\n") | |||
counter = 1 | |||
digits = len(str(len(tests))) | |||
for test in tests: | |||
data = {"name": None, "label": None, "input": None, "output": None} | |||
try: | |||
for line in test.strip().splitlines(): | |||
if line.startswith("name:"): | |||
data["name"] = line[len("name:"):].strip() | |||
elif line.startswith("label:"): | |||
data["label"] = line[len("label:"):].strip() | |||
elif line.startswith("input:"): | |||
raw = line[len("input:"):].strip() | |||
if raw[0] == '"' and raw[-1] == '"': | |||
raw = raw[1:-1] | |||
raw = raw.encode("raw_unicode_escape") | |||
data["input"] = raw.decode("unicode_escape") | |||
elif line.startswith("output:"): | |||
raw = line[len("output:"):].strip() | |||
try: | |||
data["output"] = eval(raw, vars(tokens)) | |||
except Exception as err: | |||
raise _TestParseError(err) | |||
except _TestParseError as err: | |||
if data["name"]: | |||
error = "Could not parse test '{0}' in '{1}':\n\t{2}" | |||
print(error.format(data["name"], filename, err)) | |||
else: | |||
error = "Could not parse a test in '{0}':\n\t{1}" | |||
print(error.format(filename, err)) | |||
continue | |||
if not data["name"]: | |||
error = "A test in '{0}' was ignored because it lacked a name" | |||
print(error.format(filename)) | |||
continue | |||
if data["input"] is None or data["output"] is None: | |||
error = "Test '{0}' in '{1}' was ignored because it lacked an input or an output" | |||
print(error.format(data["name"], filename)) | |||
continue | |||
number = str(counter).zfill(digits) | |||
fname = "test_{0}{1}_{2}".format(filename, number, data["name"]) | |||
meth = cls._build_test_method(fname, data) | |||
setattr(cls, fname, meth) | |||
counter += 1 | |||
@classmethod | |||
def build(cls): | |||
"""Load and install all tests from the 'tokenizer' directory.""" | |||
directory = path.join(path.dirname(__file__), "tokenizer") | |||
extension = ".mwtest" | |||
for filename in listdir(directory): | |||
if not filename.endswith(extension): | |||
continue | |||
with open(path.join(directory, filename), "r") as fp: | |||
text = fp.read() | |||
if not py3k: | |||
text = text.decode("utf8") | |||
cls._load_tests(filename[:0-len(extension)], text) | |||
TokenizerTestCase.build() |
@@ -0,0 +1,113 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from unittest import TestCase | |||
from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, | |||
Tag, Template, Text, Wikilink) | |||
from mwparserfromhell.nodes.extras import Attribute, Parameter | |||
from mwparserfromhell.wikicode import Wikicode | |||
class TreeEqualityTestCase(TestCase): | |||
"""A base test case with support for comparing the equality of node trees. | |||
This adds a number of type equality functions, for Wikicode, Text, | |||
Templates, and Wikilinks. | |||
""" | |||
def assertNodeEqual(self, expected, actual): | |||
"""Assert that two Nodes have the same type and have the same data.""" | |||
registry = { | |||
Argument: self.assertArgumentNodeEqual, | |||
Comment: self.assertCommentNodeEqual, | |||
Heading: self.assertHeadingNodeEqual, | |||
HTMLEntity: self.assertHTMLEntityNodeEqual, | |||
Tag: self.assertTagNodeEqual, | |||
Template: self.assertTemplateNodeEqual, | |||
Text: self.assertTextNodeEqual, | |||
Wikilink: self.assertWikilinkNodeEqual | |||
} | |||
for nodetype in registry: | |||
if isinstance(expected, nodetype): | |||
self.assertIsInstance(actual, nodetype) | |||
registry[nodetype](expected, actual) | |||
def assertArgumentNodeEqual(self, expected, actual): | |||
"""Assert that two Argument nodes have the same data.""" | |||
self.assertWikicodeEqual(expected.name, actual.name) | |||
if expected.default is not None: | |||
self.assertWikicodeEqual(expected.default, actual.default) | |||
else: | |||
self.assertIs(None, actual.default) | |||
def assertCommentNodeEqual(self, expected, actual): | |||
"""Assert that two Comment nodes have the same data.""" | |||
self.assertWikicodeEqual(expected.contents, actual.contents) | |||
def assertHeadingNodeEqual(self, expected, actual): | |||
"""Assert that two Heading nodes have the same data.""" | |||
self.assertWikicodeEqual(expected.title, actual.title) | |||
self.assertEqual(expected.level, actual.level) | |||
def assertHTMLEntityNodeEqual(self, expected, actual): | |||
"""Assert that two HTMLEntity nodes have the same data.""" | |||
self.assertEqual(expected.value, actual.value) | |||
self.assertIs(expected.named, actual.named) | |||
self.assertIs(expected.hexadecimal, actual.hexadecimal) | |||
self.assertEqual(expected.hex_char, actual.hex_char) | |||
def assertTagNodeEqual(self, expected, actual): | |||
"""Assert that two Tag nodes have the same data.""" | |||
self.fail("Holding this until feature/html_tags is ready.") | |||
def assertTemplateNodeEqual(self, expected, actual): | |||
"""Assert that two Template nodes have the same data.""" | |||
self.assertWikicodeEqual(expected.name, actual.name) | |||
length = len(expected.params) | |||
self.assertEqual(length, len(actual.params)) | |||
for i in range(length): | |||
exp_param = expected.params[i] | |||
act_param = actual.params[i] | |||
self.assertWikicodeEqual(exp_param.name, act_param.name) | |||
self.assertWikicodeEqual(exp_param.value, act_param.value) | |||
self.assertIs(exp_param.showkey, act_param.showkey) | |||
def assertTextNodeEqual(self, expected, actual): | |||
"""Assert that two Text nodes have the same data.""" | |||
self.assertEqual(expected.value, actual.value) | |||
def assertWikilinkNodeEqual(self, expected, actual): | |||
"""Assert that two Wikilink nodes have the same data.""" | |||
self.assertWikicodeEqual(expected.title, actual.title) | |||
if expected.text is not None: | |||
self.assertWikicodeEqual(expected.text, actual.text) | |||
else: | |||
self.assertIs(None, actual.text) | |||
def assertWikicodeEqual(self, expected, actual): | |||
"""Assert that two Wikicode objects have the same data.""" | |||
self.assertIsInstance(actual, Wikicode) | |||
length = len(expected.nodes) | |||
self.assertEqual(length, len(actual.nodes)) | |||
for i in range(length): | |||
self.assertNodeEqual(expected.get(i), actual.get(i)) |
@@ -0,0 +1,20 @@ | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Serves the same purpose as mwparserfromhell.compat, but only for objects | |||
required by unit tests. This avoids unnecessary imports (like urllib) within | |||
the main library. | |||
""" | |||
from mwparserfromhell.compat import py3k | |||
if py3k: | |||
range = range | |||
from io import StringIO | |||
from urllib.parse import urlencode | |||
from urllib.request import urlopen | |||
else: | |||
range = xrange | |||
from StringIO import StringIO | |||
from urllib import urlencode, urlopen |
@@ -0,0 +1,261 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, | |||
Tag, Template, Text, Wikilink) | |||
from mwparserfromhell.nodes.extras import Attribute, Parameter | |||
from mwparserfromhell.parser import tokens | |||
from mwparserfromhell.parser.builder import Builder | |||
from mwparserfromhell.smart_list import SmartList | |||
from mwparserfromhell.wikicode import Wikicode | |||
from ._test_tree_equality import TreeEqualityTestCase | |||
wrap = lambda L: Wikicode(SmartList(L)) | |||
class TestBuilder(TreeEqualityTestCase): | |||
"""Tests for the builder, which turns tokens into Wikicode objects.""" | |||
def setUp(self): | |||
self.builder = Builder() | |||
def test_text(self): | |||
"""tests for building Text nodes""" | |||
tests = [ | |||
([tokens.Text(text="foobar")], wrap([Text("foobar")])), | |||
([tokens.Text(text="fóóbar")], wrap([Text("fóóbar")])), | |||
([tokens.Text(text="spam"), tokens.Text(text="eggs")], | |||
wrap([Text("spam"), Text("eggs")])), | |||
] | |||
for test, valid in tests: | |||
self.assertWikicodeEqual(valid, self.builder.build(test)) | |||
def test_template(self): | |||
"""tests for building Template nodes""" | |||
tests = [ | |||
([tokens.TemplateOpen(), tokens.Text(text="foobar"), | |||
tokens.TemplateClose()], | |||
wrap([Template(wrap([Text("foobar")]))])), | |||
([tokens.TemplateOpen(), tokens.Text(text="spam"), | |||
tokens.Text(text="eggs"), tokens.TemplateClose()], | |||
wrap([Template(wrap([Text("spam"), Text("eggs")]))])), | |||
([tokens.TemplateOpen(), tokens.Text(text="foo"), | |||
tokens.TemplateParamSeparator(), tokens.Text(text="bar"), | |||
tokens.TemplateClose()], | |||
wrap([Template(wrap([Text("foo")]), params=[ | |||
Parameter(wrap([Text("1")]), wrap([Text("bar")]), | |||
showkey=False)])])), | |||
([tokens.TemplateOpen(), tokens.Text(text="foo"), | |||
tokens.TemplateParamSeparator(), tokens.Text(text="bar"), | |||
tokens.TemplateParamEquals(), tokens.Text(text="baz"), | |||
tokens.TemplateClose()], | |||
wrap([Template(wrap([Text("foo")]), params=[ | |||
Parameter(wrap([Text("bar")]), wrap([Text("baz")]))])])), | |||
([tokens.TemplateOpen(), tokens.Text(text="foo"), | |||
tokens.TemplateParamSeparator(), tokens.Text(text="bar"), | |||
tokens.TemplateParamEquals(), tokens.Text(text="baz"), | |||
tokens.TemplateParamSeparator(), tokens.Text(text="biz"), | |||
tokens.TemplateParamSeparator(), tokens.Text(text="buzz"), | |||
tokens.TemplateParamSeparator(), tokens.Text(text="3"), | |||
tokens.TemplateParamEquals(), tokens.Text(text="buff"), | |||
tokens.TemplateParamSeparator(), tokens.Text(text="baff"), | |||
tokens.TemplateClose()], | |||
wrap([Template(wrap([Text("foo")]), params=[ | |||
Parameter(wrap([Text("bar")]), wrap([Text("baz")])), | |||
Parameter(wrap([Text("1")]), wrap([Text("biz")]), | |||
showkey=False), | |||
Parameter(wrap([Text("2")]), wrap([Text("buzz")]), | |||
showkey=False), | |||
Parameter(wrap([Text("3")]), wrap([Text("buff")])), | |||
Parameter(wrap([Text("3")]), wrap([Text("baff")]), | |||
showkey=False)])])), | |||
] | |||
for test, valid in tests: | |||
self.assertWikicodeEqual(valid, self.builder.build(test)) | |||
def test_argument(self): | |||
"""tests for building Argument nodes""" | |||
tests = [ | |||
([tokens.ArgumentOpen(), tokens.Text(text="foobar"), | |||
tokens.ArgumentClose()], | |||
wrap([Argument(wrap([Text("foobar")]))])), | |||
([tokens.ArgumentOpen(), tokens.Text(text="spam"), | |||
tokens.Text(text="eggs"), tokens.ArgumentClose()], | |||
wrap([Argument(wrap([Text("spam"), Text("eggs")]))])), | |||
([tokens.ArgumentOpen(), tokens.Text(text="foo"), | |||
tokens.ArgumentSeparator(), tokens.Text(text="bar"), | |||
tokens.ArgumentClose()], | |||
wrap([Argument(wrap([Text("foo")]), wrap([Text("bar")]))])), | |||
([tokens.ArgumentOpen(), tokens.Text(text="foo"), | |||
tokens.Text(text="bar"), tokens.ArgumentSeparator(), | |||
tokens.Text(text="baz"), tokens.Text(text="biz"), | |||
tokens.ArgumentClose()], | |||
wrap([Argument(wrap([Text("foo"), Text("bar")]), | |||
wrap([Text("baz"), Text("biz")]))])), | |||
] | |||
for test, valid in tests: | |||
self.assertWikicodeEqual(valid, self.builder.build(test)) | |||
def test_wikilink(self): | |||
"""tests for building Wikilink nodes""" | |||
tests = [ | |||
([tokens.WikilinkOpen(), tokens.Text(text="foobar"), | |||
tokens.WikilinkClose()], | |||
wrap([Wikilink(wrap([Text("foobar")]))])), | |||
([tokens.WikilinkOpen(), tokens.Text(text="spam"), | |||
tokens.Text(text="eggs"), tokens.WikilinkClose()], | |||
wrap([Wikilink(wrap([Text("spam"), Text("eggs")]))])), | |||
([tokens.WikilinkOpen(), tokens.Text(text="foo"), | |||
tokens.WikilinkSeparator(), tokens.Text(text="bar"), | |||
tokens.WikilinkClose()], | |||
wrap([Wikilink(wrap([Text("foo")]), wrap([Text("bar")]))])), | |||
([tokens.WikilinkOpen(), tokens.Text(text="foo"), | |||
tokens.Text(text="bar"), tokens.WikilinkSeparator(), | |||
tokens.Text(text="baz"), tokens.Text(text="biz"), | |||
tokens.WikilinkClose()], | |||
wrap([Wikilink(wrap([Text("foo"), Text("bar")]), | |||
wrap([Text("baz"), Text("biz")]))])), | |||
] | |||
for test, valid in tests: | |||
self.assertWikicodeEqual(valid, self.builder.build(test)) | |||
def test_html_entity(self): | |||
"""tests for building HTMLEntity nodes""" | |||
tests = [ | |||
([tokens.HTMLEntityStart(), tokens.Text(text="nbsp"), | |||
tokens.HTMLEntityEnd()], | |||
wrap([HTMLEntity("nbsp", named=True, hexadecimal=False)])), | |||
([tokens.HTMLEntityStart(), tokens.HTMLEntityNumeric(), | |||
tokens.Text(text="107"), tokens.HTMLEntityEnd()], | |||
wrap([HTMLEntity("107", named=False, hexadecimal=False)])), | |||
([tokens.HTMLEntityStart(), tokens.HTMLEntityNumeric(), | |||
tokens.HTMLEntityHex(char="X"), tokens.Text(text="6B"), | |||
tokens.HTMLEntityEnd()], | |||
wrap([HTMLEntity("6B", named=False, hexadecimal=True, | |||
hex_char="X")])), | |||
] | |||
for test, valid in tests: | |||
self.assertWikicodeEqual(valid, self.builder.build(test)) | |||
def test_heading(self): | |||
"""tests for building Heading nodes""" | |||
tests = [ | |||
([tokens.HeadingStart(level=2), tokens.Text(text="foobar"), | |||
tokens.HeadingEnd()], | |||
wrap([Heading(wrap([Text("foobar")]), 2)])), | |||
([tokens.HeadingStart(level=4), tokens.Text(text="spam"), | |||
tokens.Text(text="eggs"), tokens.HeadingEnd()], | |||
wrap([Heading(wrap([Text("spam"), Text("eggs")]), 4)])), | |||
] | |||
for test, valid in tests: | |||
self.assertWikicodeEqual(valid, self.builder.build(test)) | |||
def test_comment(self): | |||
"""tests for building Comment nodes""" | |||
tests = [ | |||
([tokens.CommentStart(), tokens.Text(text="foobar"), | |||
tokens.CommentEnd()], | |||
wrap([Comment(wrap([Text("foobar")]))])), | |||
([tokens.CommentStart(), tokens.Text(text="spam"), | |||
tokens.Text(text="eggs"), tokens.CommentEnd()], | |||
wrap([Comment(wrap([Text("spam"), Text("eggs")]))])), | |||
] | |||
for test, valid in tests: | |||
self.assertWikicodeEqual(valid, self.builder.build(test)) | |||
@unittest.skip("holding this until feature/html_tags is ready") | |||
def test_tag(self): | |||
"""tests for building Tag nodes""" | |||
pass | |||
def test_integration(self): | |||
"""a test for building a combination of templates together""" | |||
# {{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}} | |||
test = [tokens.TemplateOpen(), tokens.TemplateOpen(), | |||
tokens.TemplateOpen(), tokens.TemplateOpen(), | |||
tokens.Text(text="foo"), tokens.TemplateClose(), | |||
tokens.Text(text="bar"), tokens.TemplateParamSeparator(), | |||
tokens.Text(text="baz"), tokens.TemplateParamEquals(), | |||
tokens.Text(text="biz"), tokens.TemplateClose(), | |||
tokens.Text(text="buzz"), tokens.TemplateClose(), | |||
tokens.Text(text="usr"), tokens.TemplateParamSeparator(), | |||
tokens.TemplateOpen(), tokens.Text(text="bin"), | |||
tokens.TemplateClose(), tokens.TemplateClose()] | |||
valid = wrap( | |||
[Template(wrap([Template(wrap([Template(wrap([Template(wrap([Text( | |||
"foo")])), Text("bar")]), params=[Parameter(wrap([Text("baz")]), | |||
wrap([Text("biz")]))]), Text("buzz")])), Text("usr")]), params=[ | |||
Parameter(wrap([Text("1")]), wrap([Template(wrap([Text("bin")]))]), | |||
showkey=False)])]) | |||
self.assertWikicodeEqual(valid, self.builder.build(test)) | |||
def test_integration2(self): | |||
"""an even more audacious test for building a horrible wikicode mess""" | |||
# {{a|b|{{c|[[d]]{{{e}}}}}}}[[f|{{{g}}}<!--h-->]]{{i|j= }} | |||
test = [tokens.TemplateOpen(), tokens.Text(text="a"), | |||
tokens.TemplateParamSeparator(), tokens.Text(text="b"), | |||
tokens.TemplateParamSeparator(), tokens.TemplateOpen(), | |||
tokens.Text(text="c"), tokens.TemplateParamSeparator(), | |||
tokens.WikilinkOpen(), tokens.Text(text="d"), | |||
tokens.WikilinkClose(), tokens.ArgumentOpen(), | |||
tokens.Text(text="e"), tokens.ArgumentClose(), | |||
tokens.TemplateClose(), tokens.TemplateClose(), | |||
tokens.WikilinkOpen(), tokens.Text(text="f"), | |||
tokens.WikilinkSeparator(), tokens.ArgumentOpen(), | |||
tokens.Text(text="g"), tokens.ArgumentClose(), | |||
tokens.CommentStart(), tokens.Text(text="h"), | |||
tokens.CommentEnd(), tokens.WikilinkClose(), | |||
tokens.TemplateOpen(), tokens.Text(text="i"), | |||
tokens.TemplateParamSeparator(), tokens.Text(text="j"), | |||
tokens.TemplateParamEquals(), tokens.HTMLEntityStart(), | |||
tokens.Text(text="nbsp"), tokens.HTMLEntityEnd(), | |||
tokens.TemplateClose()] | |||
valid = wrap( | |||
[Template(wrap([Text("a")]), params=[Parameter(wrap([Text("1")]), | |||
wrap([Text("b")]), showkey=False), Parameter(wrap([Text("2")]), | |||
wrap([Template(wrap([Text("c")]), params=[Parameter(wrap([Text("1") | |||
]), wrap([Wikilink(wrap([Text("d")])), Argument(wrap([Text("e")]))] | |||
), showkey=False)])]), showkey=False)]), Wikilink(wrap([Text("f")] | |||
), wrap([Argument(wrap([Text("g")])), Comment(wrap([Text("h")]))]) | |||
), Template(wrap([Text("i")]), params=[Parameter(wrap([Text("j")]), | |||
wrap([HTMLEntity("nbsp", named=True)]))])]) | |||
self.assertWikicodeEqual(valid, self.builder.build(test)) | |||
if __name__ == "__main__": | |||
unittest.main(verbosity=2) |
@@ -0,0 +1,47 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
try: | |||
from mwparserfromhell.parser._tokenizer import CTokenizer | |||
except ImportError: | |||
CTokenizer = None | |||
from ._test_tokenizer import TokenizerTestCase | |||
@unittest.skipUnless(CTokenizer, "C tokenizer not available") | |||
class TestCTokenizer(TokenizerTestCase, unittest.TestCase): | |||
"""Test cases for the C tokenizer.""" | |||
@classmethod | |||
def setUpClass(cls): | |||
cls.tokenizer = CTokenizer | |||
def test_uses_c(self): | |||
"""make sure the C tokenizer identifies as using a C extension""" | |||
self.assertTrue(CTokenizer.USES_C) | |||
self.assertTrue(CTokenizer().USES_C) | |||
if __name__ == "__main__": | |||
unittest.main(verbosity=2) |
@@ -0,0 +1,131 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import print_function, unicode_literals | |||
import json | |||
import unittest | |||
import mwparserfromhell | |||
from mwparserfromhell.compat import py3k, str | |||
from .compat import StringIO, urlencode, urlopen | |||
class TestDocs(unittest.TestCase): | |||
"""Integration test cases for mwparserfromhell's documentation.""" | |||
def assertPrint(self, input, output): | |||
"""Assertion check that *input*, when printed, produces *output*.""" | |||
buff = StringIO() | |||
print(input, end="", file=buff) | |||
buff.seek(0) | |||
self.assertEqual(output, buff.read()) | |||
def test_readme_1(self): | |||
"""test a block of example code in the README""" | |||
text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" | |||
wikicode = mwparserfromhell.parse(text) | |||
self.assertPrint(wikicode, | |||
"I has a template! {{foo|bar|baz|eggs=spam}} See it?") | |||
templates = wikicode.filter_templates() | |||
if py3k: | |||
self.assertPrint(templates, "['{{foo|bar|baz|eggs=spam}}']") | |||
else: | |||
self.assertPrint(templates, "[u'{{foo|bar|baz|eggs=spam}}']") | |||
template = templates[0] | |||
self.assertPrint(template.name, "foo") | |||
if py3k: | |||
self.assertPrint(template.params, "['bar', 'baz', 'eggs=spam']") | |||
else: | |||
self.assertPrint(template.params, "[u'bar', u'baz', u'eggs=spam']") | |||
self.assertPrint(template.get(1).value, "bar") | |||
self.assertPrint(template.get("eggs").value, "spam") | |||
def test_readme_2(self): | |||
"""test a block of example code in the README""" | |||
code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") | |||
if py3k: | |||
self.assertPrint(code.filter_templates(), | |||
"['{{foo|this {{includes a|template}}}}']") | |||
else: | |||
self.assertPrint(code.filter_templates(), | |||
"[u'{{foo|this {{includes a|template}}}}']") | |||
foo = code.filter_templates()[0] | |||
self.assertPrint(foo.get(1).value, "this {{includes a|template}}") | |||
self.assertPrint(foo.get(1).value.filter_templates()[0], | |||
"{{includes a|template}}") | |||
self.assertPrint(foo.get(1).value.filter_templates()[0].get(1).value, | |||
"template") | |||
def test_readme_3(self): | |||
"""test a block of example code in the README""" | |||
text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" | |||
temps = mwparserfromhell.parse(text).filter_templates(recursive=True) | |||
if py3k: | |||
res = "['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']" | |||
else: | |||
res = "[u'{{foo|{{bar}}={{baz|{{spam}}}}}}', u'{{bar}}', u'{{baz|{{spam}}}}', u'{{spam}}']" | |||
self.assertPrint(temps, res) | |||
def test_readme_4(self): | |||
"""test a block of example code in the README""" | |||
text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}" | |||
code = mwparserfromhell.parse(text) | |||
for template in code.filter_templates(): | |||
if template.name == "cleanup" and not template.has_param("date"): | |||
template.add("date", "July 2012") | |||
res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}}" | |||
self.assertPrint(code, res) | |||
code.replace("{{uncategorized}}", "{{bar-stub}}") | |||
res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}" | |||
self.assertPrint(code, res) | |||
if py3k: | |||
res = "['{{cleanup|date=July 2012}}', '{{bar-stub}}']" | |||
else: | |||
res = "[u'{{cleanup|date=July 2012}}', u'{{bar-stub}}']" | |||
self.assertPrint(code.filter_templates(), res) | |||
text = str(code) | |||
res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}" | |||
self.assertPrint(text, res) | |||
self.assertEqual(text, code) | |||
def test_readme_5(self): | |||
"""test a block of example code in the README; includes a web call""" | |||
url1 = "http://en.wikipedia.org/w/api.php" | |||
url2 = "http://en.wikipedia.org/w/index.php?title={0}&action=raw" | |||
title = "Test" | |||
data = {"action": "query", "prop": "revisions", "rvlimit": 1, | |||
"rvprop": "content", "format": "json", "titles": title} | |||
try: | |||
raw = urlopen(url1, urlencode(data).encode("utf8")).read() | |||
except IOError: | |||
self.skipTest("cannot continue because of unsuccessful web call") | |||
res = json.loads(raw.decode("utf8")) | |||
text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"] | |||
try: | |||
expected = urlopen(url2.format(title)).read().decode("utf8") | |||
except IOError: | |||
self.skipTest("cannot continue because of unsuccessful web call") | |||
actual = mwparserfromhell.parse(text) | |||
self.assertEqual(expected, actual) | |||
if __name__ == "__main__": | |||
unittest.main(verbosity=2) |
@@ -1,119 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
import unittest | |||
from mwparserfromhell.parameter import Parameter | |||
from mwparserfromhell.template import Template | |||
class TestParameter(unittest.TestCase): | |||
def setUp(self): | |||
self.name = "foo" | |||
self.value1 = "bar" | |||
self.value2 = "{{spam}}" | |||
self.value3 = "bar{{spam}}" | |||
self.value4 = "embedded {{eggs|spam|baz=buz}} {{goes}} here" | |||
self.templates2 = [Template("spam")] | |||
self.templates3 = [Template("spam")] | |||
self.templates4 = [Template("eggs", [Parameter("1", "spam"), | |||
Parameter("baz", "buz")]), | |||
Template("goes")] | |||
def test_construct(self): | |||
Parameter(self.name, self.value1) | |||
Parameter(self.name, self.value2, self.templates2) | |||
Parameter(name=self.name, value=self.value3) | |||
Parameter(name=self.name, value=self.value4, templates=self.templates4) | |||
def test_name(self): | |||
params = [ | |||
Parameter(self.name, self.value1), | |||
Parameter(self.name, self.value2, self.templates2), | |||
Parameter(name=self.name, value=self.value3), | |||
Parameter(name=self.name, value=self.value4, | |||
templates=self.templates4) | |||
] | |||
for param in params: | |||
self.assertEqual(param.name, self.name) | |||
def test_value(self): | |||
tests = [ | |||
(Parameter(self.name, self.value1), self.value1), | |||
(Parameter(self.name, self.value2, self.templates2), self.value2), | |||
(Parameter(name=self.name, value=self.value3), self.value3), | |||
(Parameter(name=self.name, value=self.value4, | |||
templates=self.templates4), self.value4) | |||
] | |||
for param, correct in tests: | |||
self.assertEqual(param.value, correct) | |||
def test_templates(self): | |||
tests = [ | |||
(Parameter(self.name, self.value3, self.templates3), | |||
self.templates3), | |||
(Parameter(name=self.name, value=self.value4, | |||
templates=self.templates4), self.templates4) | |||
] | |||
for param, correct in tests: | |||
self.assertEqual(param.templates, correct) | |||
def test_magic(self): | |||
params = [Parameter(self.name, self.value1), | |||
Parameter(self.name, self.value2, self.templates2), | |||
Parameter(self.name, self.value3, self.templates3), | |||
Parameter(self.name, self.value4, self.templates4)] | |||
for param in params: | |||
self.assertEqual(repr(param), repr(param.value)) | |||
self.assertEqual(str(param), str(param.value)) | |||
self.assertIs(param < "eggs", param.value < "eggs") | |||
self.assertIs(param <= "bar{{spam}}", param.value <= "bar{{spam}}") | |||
self.assertIs(param == "bar", param.value == "bar") | |||
self.assertIs(param != "bar", param.value != "bar") | |||
self.assertIs(param > "eggs", param.value > "eggs") | |||
self.assertIs(param >= "bar{{spam}}", param.value >= "bar{{spam}}") | |||
self.assertEquals(bool(param), bool(param.value)) | |||
self.assertEquals(len(param), len(param.value)) | |||
self.assertEquals(list(param), list(param.value)) | |||
self.assertEquals(param[2], param.value[2]) | |||
self.assertEquals(list(reversed(param)), | |||
list(reversed(param.value))) | |||
self.assertIs("bar" in param, "bar" in param.value) | |||
self.assertEquals(param + "test", param.value + "test") | |||
self.assertEquals("test" + param, "test" + param.value) | |||
# add param | |||
# add template left | |||
# add template right | |||
self.assertEquals(param * 3, Parameter(param.name, param.value * 3, | |||
param.templates * 3)) | |||
self.assertEquals(3 * param, Parameter(param.name, 3 * param.value, | |||
3 * param.templates)) | |||
# add param inplace | |||
# add template implace | |||
# add str inplace | |||
# multiply int inplace | |||
self.assertIsInstance(param, Parameter) | |||
self.assertIsInstance(param.value, str) | |||
if __name__ == "__main__": | |||
unittest.main(verbosity=2) |
@@ -1,6 +1,6 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
@@ -20,44 +20,50 @@ | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
from mwparserfromhell.parameter import Parameter | |||
from mwparserfromhell.parser import Parser | |||
from mwparserfromhell.template import Template | |||
from mwparserfromhell import parser | |||
from mwparserfromhell.nodes import Template, Text, Wikilink | |||
from mwparserfromhell.nodes.extras import Parameter | |||
from mwparserfromhell.smart_list import SmartList | |||
from mwparserfromhell.wikicode import Wikicode | |||
TESTS = [ | |||
("", []), | |||
("abcdef ghijhk", []), | |||
("abc{this is not a template}def", []), | |||
("neither is {{this one}nor} {this one {despite}} containing braces", []), | |||
("this is an acceptable {{template}}", [Template("template")]), | |||
("{{multiple}}{{templates}}", [Template("multiple"), | |||
Template("templates")]), | |||
("multiple {{-}} templates {{+}}!", [Template("-"), Template("+")]), | |||
("{{{no templates here}}}", []), | |||
("{ {{templates here}}}", [Template("templates here")]), | |||
("{{{{I do not exist}}}}", []), | |||
("{{foo|bar|baz|eggs=spam}}", | |||
[Template("foo", [Parameter("1", "bar"), Parameter("2", "baz"), | |||
Parameter("eggs", "spam")])]), | |||
("{{abc def|ghi|jk=lmno|pqr|st=uv|wx|yz}}", | |||
[Template("abc def", [Parameter("1", "ghi"), Parameter("jk", "lmno"), | |||
Parameter("2", "pqr"), Parameter("st", "uv"), | |||
Parameter("3", "wx"), Parameter("4", "yz")])]), | |||
("{{this has a|{{template}}|inside of it}}", | |||
[Template("this has a", [Parameter("1", "{{template}}", | |||
[Template("template")]), | |||
Parameter("2", "inside of it")])]), | |||
("{{{{I exist}} }}", [Template("I exist", [] )]), | |||
("{{}}") | |||
] | |||
from ._test_tree_equality import TreeEqualityTestCase | |||
from .compat import range | |||
class TestParser(unittest.TestCase): | |||
def test_parse(self): | |||
parser = Parser() | |||
for unparsed, parsed in TESTS: | |||
self.assertEqual(parser.parse(unparsed), parsed) | |||
class TestParser(TreeEqualityTestCase): | |||
"""Tests for the Parser class itself, which tokenizes and builds nodes.""" | |||
def test_use_c(self): | |||
"""make sure the correct tokenizer is used""" | |||
if parser.use_c: | |||
self.assertTrue(parser.Parser(None)._tokenizer.USES_C) | |||
parser.use_c = False | |||
self.assertFalse(parser.Parser(None)._tokenizer.USES_C) | |||
def test_parsing(self): | |||
"""integration test for parsing overall""" | |||
text = "this is text; {{this|is=a|template={{with|[[links]]|in}}it}}" | |||
wrap = lambda L: Wikicode(SmartList(L)) | |||
expected = wrap([ | |||
Text("this is text; "), | |||
Template(wrap([Text("this")]), [ | |||
Parameter(wrap([Text("is")]), wrap([Text("a")])), | |||
Parameter(wrap([Text("template")]), wrap([ | |||
Template(wrap([Text("with")]), [ | |||
Parameter(wrap([Text("1")]), | |||
wrap([Wikilink(wrap([Text("links")]))]), | |||
showkey=False), | |||
Parameter(wrap([Text("2")]), | |||
wrap([Text("in")]), showkey=False) | |||
]), | |||
Text("it") | |||
])) | |||
]) | |||
]) | |||
actual = parser.Parser(text).parse() | |||
self.assertWikicodeEqual(expected, actual) | |||
if __name__ == "__main__": | |||
unittest.main(verbosity=2) |
@@ -0,0 +1,43 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
from mwparserfromhell.parser.tokenizer import Tokenizer | |||
from ._test_tokenizer import TokenizerTestCase | |||
class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): | |||
"""Test cases for the Python tokenizer.""" | |||
@classmethod | |||
def setUpClass(cls): | |||
cls.tokenizer = Tokenizer | |||
def test_uses_c(self): | |||
"""make sure the Python tokenizer identifies as not using C""" | |||
self.assertFalse(Tokenizer.USES_C) | |||
self.assertFalse(Tokenizer().USES_C) | |||
if __name__ == "__main__": | |||
unittest.main(verbosity=2) |
@@ -0,0 +1,392 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
from mwparserfromhell.compat import py3k | |||
from mwparserfromhell.smart_list import SmartList, _ListProxy | |||
from .compat import range | |||
class TestSmartList(unittest.TestCase): | |||
"""Test cases for the SmartList class and its child, _ListProxy.""" | |||
def _test_get_set_del_item(self, builder): | |||
"""Run tests on __get/set/delitem__ of a list built with *builder*.""" | |||
def assign(L, s1, s2, s3, val): | |||
L[s1:s2:s3] = val | |||
def delete(L, s1): | |||
del L[s1] | |||
list1 = builder([0, 1, 2, 3, "one", "two"]) | |||
list2 = builder(list(range(10))) | |||
self.assertEqual(1, list1[1]) | |||
self.assertEqual("one", list1[-2]) | |||
self.assertEqual([2, 3], list1[2:4]) | |||
self.assertRaises(IndexError, lambda: list1[6]) | |||
self.assertRaises(IndexError, lambda: list1[-7]) | |||
self.assertEqual([0, 1, 2], list1[:3]) | |||
self.assertEqual([0, 1, 2, 3, "one", "two"], list1[:]) | |||
self.assertEqual([3, "one", "two"], list1[3:]) | |||
self.assertEqual(["one", "two"], list1[-2:]) | |||
self.assertEqual([0, 1], list1[:-4]) | |||
self.assertEqual([], list1[6:]) | |||
self.assertEqual([], list1[4:2]) | |||
self.assertEqual([0, 2, "one"], list1[0:5:2]) | |||
self.assertEqual([0, 2], list1[0:-3:2]) | |||
self.assertEqual([0, 1, 2, 3, "one", "two"], list1[::]) | |||
self.assertEqual([2, 3, "one", "two"], list1[2::]) | |||
self.assertEqual([0, 1, 2, 3], list1[:4:]) | |||
self.assertEqual([2, 3], list1[2:4:]) | |||
self.assertEqual([0, 2, 4, 6, 8], list2[::2]) | |||
self.assertEqual([2, 5, 8], list2[2::3]) | |||
self.assertEqual([0, 3], list2[:6:3]) | |||
self.assertEqual([2, 5, 8], list2[-8:9:3]) | |||
self.assertEqual([], list2[100000:1000:-100]) | |||
list1[3] = 100 | |||
self.assertEqual(100, list1[3]) | |||
list1[-3] = 101 | |||
self.assertEqual([0, 1, 2, 101, "one", "two"], list1) | |||
list1[5:] = [6, 7, 8] | |||
self.assertEqual([6, 7, 8], list1[5:]) | |||
self.assertEqual([0, 1, 2, 101, "one", 6, 7, 8], list1) | |||
list1[2:4] = [-1, -2, -3, -4, -5] | |||
self.assertEqual([0, 1, -1, -2, -3, -4, -5, "one", 6, 7, 8], list1) | |||
list1[0:-3] = [99] | |||
self.assertEqual([99, 6, 7, 8], list1) | |||
list2[0:6:2] = [100, 102, 104] | |||
self.assertEqual([100, 1, 102, 3, 104, 5, 6, 7, 8, 9], list2) | |||
list2[::3] = [200, 203, 206, 209] | |||
self.assertEqual([200, 1, 102, 203, 104, 5, 206, 7, 8, 209], list2) | |||
list2[::] = range(7) | |||
self.assertEqual([0, 1, 2, 3, 4, 5, 6], list2) | |||
self.assertRaises(ValueError, assign, list2, 0, 5, 2, | |||
[100, 102, 104, 106]) | |||
del list2[2] | |||
self.assertEqual([0, 1, 3, 4, 5, 6], list2) | |||
del list2[-3] | |||
self.assertEqual([0, 1, 3, 5, 6], list2) | |||
self.assertRaises(IndexError, delete, list2, 100) | |||
self.assertRaises(IndexError, delete, list2, -6) | |||
list2[:] = range(10) | |||
del list2[3:6] | |||
self.assertEqual([0, 1, 2, 6, 7, 8, 9], list2) | |||
del list2[-2:] | |||
self.assertEqual([0, 1, 2, 6, 7], list2) | |||
del list2[:2] | |||
self.assertEqual([2, 6, 7], list2) | |||
list2[:] = range(10) | |||
del list2[2:8:2] | |||
self.assertEqual([0, 1, 3, 5, 7, 8, 9], list2) | |||
def _test_add_radd_iadd(self, builder): | |||
"""Run tests on __r/i/add__ of a list built with *builder*.""" | |||
list1 = builder(range(5)) | |||
list2 = builder(range(5, 10)) | |||
self.assertEqual([0, 1, 2, 3, 4, 5, 6], list1 + [5, 6]) | |||
self.assertEqual([0, 1, 2, 3, 4], list1) | |||
self.assertEqual(list(range(10)), list1 + list2) | |||
self.assertEqual([-2, -1, 0, 1, 2, 3, 4], [-2, -1] + list1) | |||
self.assertEqual([0, 1, 2, 3, 4], list1) | |||
list1 += ["foo", "bar", "baz"] | |||
self.assertEqual([0, 1, 2, 3, 4, "foo", "bar", "baz"], list1) | |||
def _test_other_magic_methods(self, builder): | |||
"""Run tests on other magic methods of a list built with *builder*.""" | |||
list1 = builder([0, 1, 2, 3, "one", "two"]) | |||
list2 = builder([]) | |||
list3 = builder([0, 2, 3, 4]) | |||
list4 = builder([0, 1, 2]) | |||
if py3k: | |||
self.assertEqual("[0, 1, 2, 3, 'one', 'two']", str(list1)) | |||
self.assertEqual(b"\x00\x01\x02", bytes(list4)) | |||
self.assertEqual("[0, 1, 2, 3, 'one', 'two']", repr(list1)) | |||
else: | |||
self.assertEqual("[0, 1, 2, 3, u'one', u'two']", unicode(list1)) | |||
self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", str(list1)) | |||
self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", repr(list1)) | |||
self.assertTrue(list1 < list3) | |||
self.assertTrue(list1 <= list3) | |||
self.assertFalse(list1 == list3) | |||
self.assertTrue(list1 != list3) | |||
self.assertFalse(list1 > list3) | |||
self.assertFalse(list1 >= list3) | |||
other1 = [0, 2, 3, 4] | |||
self.assertTrue(list1 < other1) | |||
self.assertTrue(list1 <= other1) | |||
self.assertFalse(list1 == other1) | |||
self.assertTrue(list1 != other1) | |||
self.assertFalse(list1 > other1) | |||
self.assertFalse(list1 >= other1) | |||
other2 = [0, 0, 1, 2] | |||
self.assertFalse(list1 < other2) | |||
self.assertFalse(list1 <= other2) | |||
self.assertFalse(list1 == other2) | |||
self.assertTrue(list1 != other2) | |||
self.assertTrue(list1 > other2) | |||
self.assertTrue(list1 >= other2) | |||
other3 = [0, 1, 2, 3, "one", "two"] | |||
self.assertFalse(list1 < other3) | |||
self.assertTrue(list1 <= other3) | |||
self.assertTrue(list1 == other3) | |||
self.assertFalse(list1 != other3) | |||
self.assertFalse(list1 > other3) | |||
self.assertTrue(list1 >= other3) | |||
self.assertTrue(bool(list1)) | |||
self.assertFalse(bool(list2)) | |||
self.assertEqual(6, len(list1)) | |||
self.assertEqual(0, len(list2)) | |||
out = [] | |||
for obj in list1: | |||
out.append(obj) | |||
self.assertEqual([0, 1, 2, 3, "one", "two"], out) | |||
out = [] | |||
for ch in list2: | |||
out.append(ch) | |||
self.assertEqual([], out) | |||
gen1 = iter(list1) | |||
out = [] | |||
for i in range(len(list1)): | |||
out.append(next(gen1)) | |||
self.assertRaises(StopIteration, next, gen1) | |||
self.assertEqual([0, 1, 2, 3, "one", "two"], out) | |||
gen2 = iter(list2) | |||
self.assertRaises(StopIteration, next, gen2) | |||
self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1))) | |||
self.assertEqual([], list(reversed(list2))) | |||
self.assertTrue("one" in list1) | |||
self.assertTrue(3 in list1) | |||
self.assertFalse(10 in list1) | |||
self.assertFalse(0 in list2) | |||
self.assertEqual([], list2 * 5) | |||
self.assertEqual([], 5 * list2) | |||
self.assertEqual([0, 1, 2, 0, 1, 2, 0, 1, 2], list4 * 3) | |||
self.assertEqual([0, 1, 2, 0, 1, 2, 0, 1, 2], 3 * list4) | |||
list4 *= 2 | |||
self.assertEqual([0, 1, 2, 0, 1, 2], list4) | |||
def _test_list_methods(self, builder): | |||
"""Run tests on the public methods of a list built with *builder*.""" | |||
list1 = builder(range(5)) | |||
list2 = builder(["foo"]) | |||
list3 = builder([("a", 5), ("d", 2), ("b", 8), ("c", 3)]) | |||
list1.append(5) | |||
list1.append(1) | |||
list1.append(2) | |||
self.assertEqual([0, 1, 2, 3, 4, 5, 1, 2], list1) | |||
self.assertEqual(0, list1.count(6)) | |||
self.assertEqual(2, list1.count(1)) | |||
list1.extend(range(5, 8)) | |||
self.assertEqual([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) | |||
self.assertEqual(1, list1.index(1)) | |||
self.assertEqual(6, list1.index(1, 3)) | |||
self.assertEqual(6, list1.index(1, 3, 7)) | |||
self.assertRaises(ValueError, list1.index, 1, 3, 5) | |||
list1.insert(0, -1) | |||
self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) | |||
list1.insert(-1, 6.5) | |||
self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1) | |||
list1.insert(13, 8) | |||
self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1) | |||
self.assertEqual(8, list1.pop()) | |||
self.assertEqual(7, list1.pop()) | |||
self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1) | |||
self.assertEqual(-1, list1.pop(0)) | |||
self.assertEqual(5, list1.pop(5)) | |||
self.assertEqual(6.5, list1.pop(-1)) | |||
self.assertEqual([0, 1, 2, 3, 4, 1, 2, 5, 6], list1) | |||
self.assertEqual("foo", list2.pop()) | |||
self.assertRaises(IndexError, list2.pop) | |||
self.assertEqual([], list2) | |||
list1.remove(6) | |||
self.assertEqual([0, 1, 2, 3, 4, 1, 2, 5], list1) | |||
list1.remove(1) | |||
self.assertEqual([0, 2, 3, 4, 1, 2, 5], list1) | |||
list1.remove(1) | |||
self.assertEqual([0, 2, 3, 4, 2, 5], list1) | |||
self.assertRaises(ValueError, list1.remove, 1) | |||
list1.reverse() | |||
self.assertEqual([5, 2, 4, 3, 2, 0], list1) | |||
list1.sort() | |||
self.assertEqual([0, 2, 2, 3, 4, 5], list1) | |||
list1.sort(reverse=True) | |||
self.assertEqual([5, 4, 3, 2, 2, 0], list1) | |||
if not py3k: | |||
func = lambda x, y: abs(3 - x) - abs(3 - y) # Distance from 3 | |||
list1.sort(cmp=func) | |||
self.assertEqual([3, 4, 2, 2, 5, 0], list1) | |||
list1.sort(cmp=func, reverse=True) | |||
self.assertEqual([0, 5, 4, 2, 2, 3], list1) | |||
list3.sort(key=lambda i: i[1]) | |||
self.assertEqual([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) | |||
list3.sort(key=lambda i: i[1], reverse=True) | |||
self.assertEqual([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3) | |||
def test_docs(self): | |||
"""make sure the methods of SmartList/_ListProxy have docstrings""" | |||
methods = ["append", "count", "extend", "index", "insert", "pop", | |||
"remove", "reverse", "sort"] | |||
for meth in methods: | |||
expected = getattr(list, meth).__doc__ | |||
smartlist_doc = getattr(SmartList, meth).__doc__ | |||
listproxy_doc = getattr(_ListProxy, meth).__doc__ | |||
self.assertEqual(expected, smartlist_doc) | |||
self.assertEqual(expected, listproxy_doc) | |||
def test_doctest(self): | |||
"""make sure the test embedded in SmartList's docstring passes""" | |||
parent = SmartList([0, 1, 2, 3]) | |||
self.assertEqual([0, 1, 2, 3], parent) | |||
child = parent[2:] | |||
self.assertEqual([2, 3], child) | |||
child.append(4) | |||
self.assertEqual([2, 3, 4], child) | |||
self.assertEqual([0, 1, 2, 3, 4], parent) | |||
def test_parent_get_set_del(self): | |||
"""make sure SmartList's getitem/setitem/delitem work""" | |||
self._test_get_set_del_item(SmartList) | |||
def test_parent_add(self): | |||
"""make sure SmartList's add/radd/iadd work""" | |||
self._test_add_radd_iadd(SmartList) | |||
def test_parent_unaffected_magics(self): | |||
"""sanity checks against SmartList features that were not modified""" | |||
self._test_other_magic_methods(SmartList) | |||
def test_parent_methods(self): | |||
"""make sure SmartList's non-magic methods work, like append()""" | |||
self._test_list_methods(SmartList) | |||
def test_child_get_set_del(self): | |||
"""make sure _ListProxy's getitem/setitem/delitem work""" | |||
self._test_get_set_del_item(lambda L: SmartList(list(L))[:]) | |||
self._test_get_set_del_item(lambda L: SmartList([999] + list(L))[1:]) | |||
self._test_get_set_del_item(lambda L: SmartList(list(L) + [999])[:-1]) | |||
builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] | |||
self._test_get_set_del_item(builder) | |||
def test_child_add(self): | |||
"""make sure _ListProxy's add/radd/iadd work""" | |||
self._test_add_radd_iadd(lambda L: SmartList(list(L))[:]) | |||
self._test_add_radd_iadd(lambda L: SmartList([999] + list(L))[1:]) | |||
self._test_add_radd_iadd(lambda L: SmartList(list(L) + [999])[:-1]) | |||
builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] | |||
self._test_add_radd_iadd(builder) | |||
def test_child_other_magics(self): | |||
"""make sure _ListProxy's other magically implemented features work""" | |||
self._test_other_magic_methods(lambda L: SmartList(list(L))[:]) | |||
self._test_other_magic_methods(lambda L: SmartList([999] + list(L))[1:]) | |||
self._test_other_magic_methods(lambda L: SmartList(list(L) + [999])[:-1]) | |||
builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] | |||
self._test_other_magic_methods(builder) | |||
def test_child_methods(self): | |||
"""make sure _ListProxy's non-magic methods work, like append()""" | |||
self._test_list_methods(lambda L: SmartList(list(L))[:]) | |||
self._test_list_methods(lambda L: SmartList([999] + list(L))[1:]) | |||
self._test_list_methods(lambda L: SmartList(list(L) + [999])[:-1]) | |||
builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] | |||
self._test_list_methods(builder) | |||
def test_influence(self): | |||
"""make sure changes are propagated from parents to children""" | |||
parent = SmartList([0, 1, 2, 3, 4, 5]) | |||
child1 = parent[2:] | |||
child2 = parent[2:5] | |||
parent.append(6) | |||
child1.append(7) | |||
child2.append(4.5) | |||
self.assertEqual([0, 1, 2, 3, 4, 4.5, 5, 6, 7], parent) | |||
self.assertEqual([2, 3, 4, 4.5, 5, 6, 7], child1) | |||
self.assertEqual([2, 3, 4, 4.5], child2) | |||
parent.insert(0, -1) | |||
parent.insert(4, 2.5) | |||
parent.insert(10, 6.5) | |||
self.assertEqual([-1, 0, 1, 2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], parent) | |||
self.assertEqual([2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], child1) | |||
self.assertEqual([2, 2.5, 3, 4, 4.5], child2) | |||
self.assertEqual(7, parent.pop()) | |||
self.assertEqual(6.5, child1.pop()) | |||
self.assertEqual(4.5, child2.pop()) | |||
self.assertEqual([-1, 0, 1, 2, 2.5, 3, 4, 5, 6], parent) | |||
self.assertEqual([2, 2.5, 3, 4, 5, 6], child1) | |||
self.assertEqual([2, 2.5, 3, 4], child2) | |||
parent.remove(-1) | |||
child1.remove(2.5) | |||
self.assertEqual([0, 1, 2, 3, 4, 5, 6], parent) | |||
self.assertEqual([2, 3, 4, 5, 6], child1) | |||
self.assertEqual([2, 3, 4], child2) | |||
self.assertEqual(0, parent.pop(0)) | |||
self.assertEqual([1, 2, 3, 4, 5, 6], parent) | |||
self.assertEqual([2, 3, 4, 5, 6], child1) | |||
self.assertEqual([2, 3, 4], child2) | |||
child2.reverse() | |||
self.assertEqual([1, 4, 3, 2, 5, 6], parent) | |||
self.assertEqual([4, 3, 2, 5, 6], child1) | |||
self.assertEqual([4, 3, 2], child2) | |||
parent.extend([7, 8]) | |||
child1.extend([8.1, 8.2]) | |||
child2.extend([1.9, 1.8]) | |||
self.assertEqual([1, 4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], parent) | |||
self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) | |||
self.assertEqual([4, 3, 2, 1.9, 1.8], child2) | |||
if __name__ == "__main__": | |||
unittest.main(verbosity=2) |
@@ -0,0 +1,435 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
from sys import getdefaultencoding | |||
from types import GeneratorType | |||
import unittest | |||
from mwparserfromhell.compat import bytes, py3k, str | |||
from mwparserfromhell.string_mixin import StringMixIn | |||
from .compat import range | |||
class _FakeString(StringMixIn): | |||
def __init__(self, data): | |||
self._data = data | |||
def __unicode__(self): | |||
return self._data | |||
class TestStringMixIn(unittest.TestCase): | |||
"""Test cases for the StringMixIn class.""" | |||
def test_docs(self): | |||
"""make sure the various methods of StringMixIn have docstrings""" | |||
methods = [ | |||
"capitalize", "center", "count", "encode", "endswith", | |||
"expandtabs", "find", "format", "index", "isalnum", "isalpha", | |||
"isdecimal", "isdigit", "islower", "isnumeric", "isspace", | |||
"istitle", "isupper", "join", "ljust", "lower", "lstrip", | |||
"partition", "replace", "rfind", "rindex", "rjust", "rpartition", | |||
"rsplit", "rstrip", "split", "splitlines", "startswith", "strip", | |||
"swapcase", "title", "translate", "upper", "zfill"] | |||
if py3k: | |||
methods.extend(["casefold", "format_map", "isidentifier", | |||
"isprintable", "maketrans"]) | |||
else: | |||
methods.append("decode") | |||
for meth in methods: | |||
expected = getattr(str, meth).__doc__ | |||
actual = getattr(StringMixIn, meth).__doc__ | |||
self.assertEqual(expected, actual) | |||
def test_types(self): | |||
"""make sure StringMixIns convert to different types correctly""" | |||
fstr = _FakeString("fake string") | |||
self.assertEqual(str(fstr), "fake string") | |||
self.assertEqual(bytes(fstr), b"fake string") | |||
if py3k: | |||
self.assertEqual(repr(fstr), "'fake string'") | |||
else: | |||
self.assertEqual(repr(fstr), b"u'fake string'") | |||
self.assertIsInstance(str(fstr), str) | |||
self.assertIsInstance(bytes(fstr), bytes) | |||
if py3k: | |||
self.assertIsInstance(repr(fstr), str) | |||
else: | |||
self.assertIsInstance(repr(fstr), bytes) | |||
def test_comparisons(self): | |||
"""make sure comparison operators work""" | |||
str1 = _FakeString("this is a fake string") | |||
str2 = _FakeString("this is a fake string") | |||
str3 = _FakeString("fake string, this is") | |||
str4 = "this is a fake string" | |||
str5 = "fake string, this is" | |||
self.assertFalse(str1 > str2) | |||
self.assertTrue(str1 >= str2) | |||
self.assertTrue(str1 == str2) | |||
self.assertFalse(str1 != str2) | |||
self.assertFalse(str1 < str2) | |||
self.assertTrue(str1 <= str2) | |||
self.assertTrue(str1 > str3) | |||
self.assertTrue(str1 >= str3) | |||
self.assertFalse(str1 == str3) | |||
self.assertTrue(str1 != str3) | |||
self.assertFalse(str1 < str3) | |||
self.assertFalse(str1 <= str3) | |||
self.assertFalse(str1 > str4) | |||
self.assertTrue(str1 >= str4) | |||
self.assertTrue(str1 == str4) | |||
self.assertFalse(str1 != str4) | |||
self.assertFalse(str1 < str4) | |||
self.assertTrue(str1 <= str4) | |||
self.assertTrue(str1 > str5) | |||
self.assertTrue(str1 >= str5) | |||
self.assertFalse(str1 == str5) | |||
self.assertTrue(str1 != str5) | |||
self.assertFalse(str1 < str5) | |||
self.assertFalse(str1 <= str5) | |||
def test_other_magics(self): | |||
"""test other magically implemented features, like len() and iter()""" | |||
str1 = _FakeString("fake string") | |||
str2 = _FakeString("") | |||
expected = ["f", "a", "k", "e", " ", "s", "t", "r", "i", "n", "g"] | |||
self.assertTrue(str1) | |||
self.assertFalse(str2) | |||
self.assertEqual(11, len(str1)) | |||
self.assertEqual(0, len(str2)) | |||
out = [] | |||
for ch in str1: | |||
out.append(ch) | |||
self.assertEqual(expected, out) | |||
out = [] | |||
for ch in str2: | |||
out.append(ch) | |||
self.assertEqual([], out) | |||
gen1 = iter(str1) | |||
gen2 = iter(str2) | |||
self.assertIsInstance(gen1, GeneratorType) | |||
self.assertIsInstance(gen2, GeneratorType) | |||
out = [] | |||
for i in range(len(str1)): | |||
out.append(next(gen1)) | |||
self.assertRaises(StopIteration, next, gen1) | |||
self.assertEqual(expected, out) | |||
self.assertRaises(StopIteration, next, gen2) | |||
self.assertEqual("gnirts ekaf", "".join(list(reversed(str1)))) | |||
self.assertEqual([], list(reversed(str2))) | |||
self.assertEqual("f", str1[0]) | |||
self.assertEqual(" ", str1[4]) | |||
self.assertEqual("g", str1[10]) | |||
self.assertEqual("n", str1[-2]) | |||
self.assertRaises(IndexError, lambda: str1[11]) | |||
self.assertRaises(IndexError, lambda: str2[0]) | |||
self.assertTrue("k" in str1) | |||
self.assertTrue("fake" in str1) | |||
self.assertTrue("str" in str1) | |||
self.assertTrue("" in str1) | |||
self.assertTrue("" in str2) | |||
self.assertFalse("real" in str1) | |||
self.assertFalse("s" in str2) | |||
def test_other_methods(self): | |||
"""test the remaining non-magic methods of StringMixIn""" | |||
str1 = _FakeString("fake string") | |||
self.assertEqual("Fake string", str1.capitalize()) | |||
self.assertEqual(" fake string ", str1.center(15)) | |||
self.assertEqual(" fake string ", str1.center(16)) | |||
self.assertEqual("qqfake stringqq", str1.center(15, "q")) | |||
self.assertEqual(1, str1.count("e")) | |||
self.assertEqual(0, str1.count("z")) | |||
self.assertEqual(1, str1.count("r", 7)) | |||
self.assertEqual(0, str1.count("r", 8)) | |||
self.assertEqual(1, str1.count("r", 5, 9)) | |||
self.assertEqual(0, str1.count("r", 5, 7)) | |||
if not py3k: | |||
str2 = _FakeString("fo") | |||
self.assertEqual(str1, str1.decode()) | |||
actual = _FakeString("\\U00010332\\U0001033f\\U00010344") | |||
self.assertEqual("𐌲𐌿𐍄", actual.decode("unicode_escape")) | |||
self.assertRaises(UnicodeError, str2.decode, "punycode") | |||
self.assertEqual("", str2.decode("punycode", "ignore")) | |||
str3 = _FakeString("𐌲𐌿𐍄") | |||
actual = b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84" | |||
self.assertEqual(b"fake string", str1.encode()) | |||
self.assertEqual(actual, str3.encode("utf-8")) | |||
self.assertEqual(actual, str3.encode(encoding="utf-8")) | |||
if getdefaultencoding() == "ascii": | |||
self.assertRaises(UnicodeEncodeError, str3.encode) | |||
elif getdefaultencoding() == "utf-8": | |||
self.assertEqual(actual, str3.encode()) | |||
self.assertRaises(UnicodeEncodeError, str3.encode, "ascii") | |||
self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict") | |||
if getdefaultencoding() == "ascii": | |||
self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict") | |||
elif getdefaultencoding() == "utf-8": | |||
self.assertEqual(actual, str3.encode(errors="strict")) | |||
self.assertEqual(b"", str3.encode("ascii", "ignore")) | |||
if getdefaultencoding() == "ascii": | |||
self.assertEqual(b"", str3.encode(errors="ignore")) | |||
elif getdefaultencoding() == "utf-8": | |||
self.assertEqual(actual, str3.encode(errors="ignore")) | |||
self.assertTrue(str1.endswith("ing")) | |||
self.assertFalse(str1.endswith("ingh")) | |||
str4 = _FakeString("\tfoobar") | |||
self.assertEqual("fake string", str1) | |||
self.assertEqual(" foobar", str4.expandtabs()) | |||
self.assertEqual(" foobar", str4.expandtabs(4)) | |||
self.assertEqual(3, str1.find("e")) | |||
self.assertEqual(-1, str1.find("z")) | |||
self.assertEqual(7, str1.find("r", 7)) | |||
self.assertEqual(-1, str1.find("r", 8)) | |||
self.assertEqual(7, str1.find("r", 5, 9)) | |||
self.assertEqual(-1, str1.find("r", 5, 7)) | |||
str5 = _FakeString("foo{0}baz") | |||
str6 = _FakeString("foo{abc}baz") | |||
str7 = _FakeString("foo{0}{abc}buzz") | |||
str8 = _FakeString("{0}{1}") | |||
self.assertEqual("fake string", str1.format()) | |||
self.assertEqual("foobarbaz", str5.format("bar")) | |||
self.assertEqual("foobarbaz", str6.format(abc="bar")) | |||
self.assertEqual("foobarbazbuzz", str7.format("bar", abc="baz")) | |||
self.assertRaises(IndexError, str8.format, "abc") | |||
if py3k: | |||
self.assertEqual("fake string", str1.format_map({})) | |||
self.assertEqual("foobarbaz", str6.format_map({"abc": "bar"})) | |||
self.assertRaises(ValueError, str5.format_map, {0: "abc"}) | |||
self.assertEqual(3, str1.index("e")) | |||
self.assertRaises(ValueError, str1.index, "z") | |||
self.assertEqual(7, str1.index("r", 7)) | |||
self.assertRaises(ValueError, str1.index, "r", 8) | |||
self.assertEqual(7, str1.index("r", 5, 9)) | |||
self.assertRaises(ValueError, str1.index, "r", 5, 7) | |||
str9 = _FakeString("foobar") | |||
str10 = _FakeString("foobar123") | |||
str11 = _FakeString("foo bar") | |||
self.assertTrue(str9.isalnum()) | |||
self.assertTrue(str10.isalnum()) | |||
self.assertFalse(str11.isalnum()) | |||
self.assertTrue(str9.isalpha()) | |||
self.assertFalse(str10.isalpha()) | |||
self.assertFalse(str11.isalpha()) | |||
str12 = _FakeString("123") | |||
str13 = _FakeString("\u2155") | |||
str14 = _FakeString("\u00B2") | |||
self.assertFalse(str9.isdecimal()) | |||
self.assertTrue(str12.isdecimal()) | |||
self.assertFalse(str13.isdecimal()) | |||
self.assertFalse(str14.isdecimal()) | |||
self.assertFalse(str9.isdigit()) | |||
self.assertTrue(str12.isdigit()) | |||
self.assertFalse(str13.isdigit()) | |||
self.assertTrue(str14.isdigit()) | |||
if py3k: | |||
self.assertTrue(str9.isidentifier()) | |||
self.assertTrue(str10.isidentifier()) | |||
self.assertFalse(str11.isidentifier()) | |||
self.assertFalse(str12.isidentifier()) | |||
str15 = _FakeString("") | |||
str16 = _FakeString("FooBar") | |||
self.assertTrue(str9.islower()) | |||
self.assertFalse(str15.islower()) | |||
self.assertFalse(str16.islower()) | |||
self.assertFalse(str9.isnumeric()) | |||
self.assertTrue(str12.isnumeric()) | |||
self.assertTrue(str13.isnumeric()) | |||
self.assertTrue(str14.isnumeric()) | |||
if py3k: | |||
str16B = _FakeString("\x01\x02") | |||
self.assertTrue(str9.isprintable()) | |||
self.assertTrue(str13.isprintable()) | |||
self.assertTrue(str14.isprintable()) | |||
self.assertTrue(str15.isprintable()) | |||
self.assertFalse(str16B.isprintable()) | |||
str17 = _FakeString(" ") | |||
str18 = _FakeString("\t \t \r\n") | |||
self.assertFalse(str1.isspace()) | |||
self.assertFalse(str9.isspace()) | |||
self.assertTrue(str17.isspace()) | |||
self.assertTrue(str18.isspace()) | |||
str19 = _FakeString("This Sentence Looks Like A Title") | |||
str20 = _FakeString("This sentence doesn't LookLikeATitle") | |||
self.assertFalse(str15.istitle()) | |||
self.assertTrue(str19.istitle()) | |||
self.assertFalse(str20.istitle()) | |||
str21 = _FakeString("FOOBAR") | |||
self.assertFalse(str9.isupper()) | |||
self.assertFalse(str15.isupper()) | |||
self.assertTrue(str21.isupper()) | |||
self.assertEqual("foobar", str15.join(["foo", "bar"])) | |||
self.assertEqual("foo123bar123baz", str12.join(("foo", "bar", "baz"))) | |||
self.assertEqual("fake string ", str1.ljust(15)) | |||
self.assertEqual("fake string ", str1.ljust(16)) | |||
self.assertEqual("fake stringqqqq", str1.ljust(15, "q")) | |||
str22 = _FakeString("ß") | |||
self.assertEqual("", str15.lower()) | |||
self.assertEqual("foobar", str16.lower()) | |||
self.assertEqual("ß", str22.lower()) | |||
if py3k: | |||
self.assertEqual("", str15.casefold()) | |||
self.assertEqual("foobar", str16.casefold()) | |||
self.assertEqual("ss", str22.casefold()) | |||
str23 = _FakeString(" fake string ") | |||
self.assertEqual("fake string", str1.lstrip()) | |||
self.assertEqual("fake string ", str23.lstrip()) | |||
self.assertEqual("ke string", str1.lstrip("abcdef")) | |||
self.assertEqual(("fa", "ke", " string"), str1.partition("ke")) | |||
self.assertEqual(("fake string", "", ""), str1.partition("asdf")) | |||
str24 = _FakeString("boo foo moo") | |||
self.assertEqual("real string", str1.replace("fake", "real")) | |||
self.assertEqual("bu fu moo", str24.replace("oo", "u", 2)) | |||
self.assertEqual(3, str1.rfind("e")) | |||
self.assertEqual(-1, str1.rfind("z")) | |||
self.assertEqual(7, str1.rfind("r", 7)) | |||
self.assertEqual(-1, str1.rfind("r", 8)) | |||
self.assertEqual(7, str1.rfind("r", 5, 9)) | |||
self.assertEqual(-1, str1.rfind("r", 5, 7)) | |||
self.assertEqual(3, str1.rindex("e")) | |||
self.assertRaises(ValueError, str1.rindex, "z") | |||
self.assertEqual(7, str1.rindex("r", 7)) | |||
self.assertRaises(ValueError, str1.rindex, "r", 8) | |||
self.assertEqual(7, str1.rindex("r", 5, 9)) | |||
self.assertRaises(ValueError, str1.rindex, "r", 5, 7) | |||
self.assertEqual(" fake string", str1.rjust(15)) | |||
self.assertEqual(" fake string", str1.rjust(16)) | |||
self.assertEqual("qqqqfake string", str1.rjust(15, "q")) | |||
self.assertEqual(("fa", "ke", " string"), str1.rpartition("ke")) | |||
self.assertEqual(("", "", "fake string"), str1.rpartition("asdf")) | |||
str25 = _FakeString(" this is a sentence with whitespace ") | |||
actual = ["this", "is", "a", "sentence", "with", "whitespace"] | |||
self.assertEqual(actual, str25.rsplit()) | |||
self.assertEqual(actual, str25.rsplit(None)) | |||
actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", | |||
"", "whitespace", ""] | |||
self.assertEqual(actual, str25.rsplit(" ")) | |||
actual = [" this is a", "sentence", "with", "whitespace"] | |||
self.assertEqual(actual, str25.rsplit(None, 3)) | |||
actual = [" this is a sentence with", "", "whitespace", ""] | |||
self.assertEqual(actual, str25.rsplit(" ", 3)) | |||
if py3k: | |||
actual = [" this is a", "sentence", "with", "whitespace"] | |||
self.assertEqual(actual, str25.rsplit(maxsplit=3)) | |||
self.assertEqual("fake string", str1.rstrip()) | |||
self.assertEqual(" fake string", str23.rstrip()) | |||
self.assertEqual("fake stri", str1.rstrip("ngr")) | |||
actual = ["this", "is", "a", "sentence", "with", "whitespace"] | |||
self.assertEqual(actual, str25.split()) | |||
self.assertEqual(actual, str25.split(None)) | |||
actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", | |||
"", "whitespace", ""] | |||
self.assertEqual(actual, str25.split(" ")) | |||
actual = ["this", "is", "a", "sentence with whitespace "] | |||
self.assertEqual(actual, str25.split(None, 3)) | |||
actual = ["", "", "", "this is a sentence with whitespace "] | |||
self.assertEqual(actual, str25.split(" ", 3)) | |||
if py3k: | |||
actual = ["this", "is", "a", "sentence with whitespace "] | |||
self.assertEqual(actual, str25.split(maxsplit=3)) | |||
str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") | |||
self.assertEqual(["lines", "of", "text", "are", "presented", "here"], | |||
str26.splitlines()) | |||
self.assertEqual(["lines\n", "of\n", "text\r\n", "are\r\n", | |||
"presented\n", "here"], str26.splitlines(True)) | |||
self.assertTrue(str1.startswith("fake")) | |||
self.assertFalse(str1.startswith("faker")) | |||
self.assertEqual("fake string", str1.strip()) | |||
self.assertEqual("fake string", str23.strip()) | |||
self.assertEqual("ke stri", str1.strip("abcdefngr")) | |||
self.assertEqual("fOObAR", str16.swapcase()) | |||
self.assertEqual("Fake String", str1.title()) | |||
if py3k: | |||
table1 = str.maketrans({97: "1", 101: "2", 105: "3", 111: "4", | |||
117: "5"}) | |||
table2 = str.maketrans("aeiou", "12345") | |||
table3 = str.maketrans("aeiou", "12345", "rts") | |||
self.assertEqual("f1k2 str3ng", str1.translate(table1)) | |||
self.assertEqual("f1k2 str3ng", str1.translate(table2)) | |||
self.assertEqual("f1k2 3ng", str1.translate(table3)) | |||
else: | |||
table = {97: "1", 101: "2", 105: "3", 111: "4", 117: "5"} | |||
self.assertEqual("f1k2 str3ng", str1.translate(table)) | |||
self.assertEqual("", str15.upper()) | |||
self.assertEqual("FOOBAR", str16.upper()) | |||
self.assertEqual("123", str12.zfill(3)) | |||
self.assertEqual("000123", str12.zfill(6)) | |||
if __name__ == "__main__": | |||
unittest.main(verbosity=2) |
@@ -1,106 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from itertools import permutations | |||
import unittest | |||
from mwparserfromhell.parameter import Parameter | |||
from mwparserfromhell.template import Template | |||
class TestTemplate(unittest.TestCase): | |||
def setUp(self): | |||
self.name = "foo" | |||
self.bar = Parameter("1", "bar") | |||
self.baz = Parameter("2", "baz") | |||
self.eggs = Parameter("eggs", "spam") | |||
self.params = [self.bar, self.baz, self.eggs] | |||
def test_construct(self): | |||
Template(self.name) | |||
Template(self.name, self.params) | |||
Template(name=self.name) | |||
Template(name=self.name, params=self.params) | |||
def test_name(self): | |||
templates = [ | |||
Template(self.name), | |||
Template(self.name, self.params), | |||
Template(name=self.name), | |||
Template(name=self.name, params=self.params) | |||
] | |||
for template in templates: | |||
self.assertEqual(template.name, self.name) | |||
def test_params(self): | |||
for template in (Template(self.name), Template(name=self.name)): | |||
self.assertEqual(template.params, []) | |||
for template in (Template(self.name, self.params), | |||
Template(name=self.name, params=self.params)): | |||
self.assertEqual(template.params, self.params) | |||
def test_getitem(self): | |||
template = Template(name=self.name, params=self.params) | |||
self.assertIs(template[0], self.bar) | |||
self.assertIs(template[1], self.baz) | |||
self.assertIs(template[2], self.eggs) | |||
self.assertIs(template["1"], self.bar) | |||
self.assertIs(template["2"], self.baz) | |||
self.assertIs(template["eggs"], self.eggs) | |||
def test_render(self): | |||
tests = [ | |||
(Template(self.name), "{{foo}}"), | |||
(Template(self.name, self.params), "{{foo|bar|baz|eggs=spam}}") | |||
] | |||
for template, rendered in tests: | |||
self.assertEqual(template.render(), rendered) | |||
def test_repr(self): | |||
correct1= 'Template(name=foo, params={})' | |||
correct2 = 'Template(name=foo, params={"1": "bar", "2": "baz", "eggs": "spam"})' | |||
tests = [(Template(self.name), correct1), | |||
(Template(self.name, self.params), correct2)] | |||
for template, correct in tests: | |||
self.assertEqual(repr(template), correct) | |||
self.assertEqual(str(template), correct) | |||
def test_cmp(self): | |||
tmp1 = Template(self.name) | |||
tmp2 = Template(name=self.name) | |||
tmp3 = Template(self.name, []) | |||
tmp4 = Template(name=self.name, params=[]) | |||
tmp5 = Template(self.name, self.params) | |||
tmp6 = Template(name=self.name, params=self.params) | |||
for tmpA, tmpB in permutations((tmp1, tmp2, tmp3, tmp4), 2): | |||
self.assertEqual(tmpA, tmpB) | |||
for tmpA, tmpB in permutations((tmp5, tmp6), 2): | |||
self.assertEqual(tmpA, tmpB) | |||
for tmpA in (tmp5, tmp6): | |||
for tmpB in (tmp1, tmp2, tmp3, tmp4): | |||
self.assertNotEqual(tmpA, tmpB) | |||
self.assertNotEqual(tmpB, tmpA) | |||
if __name__ == "__main__": | |||
unittest.main(verbosity=2) |
@@ -0,0 +1,108 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
from mwparserfromhell.compat import py3k | |||
from mwparserfromhell.parser import tokens | |||
class TestTokens(unittest.TestCase): | |||
"""Test cases for the Token class and its subclasses.""" | |||
def test_issubclass(self): | |||
"""check that all classes within the tokens module are really Tokens""" | |||
for name in tokens.__all__: | |||
klass = getattr(tokens, name) | |||
self.assertTrue(issubclass(klass, tokens.Token)) | |||
self.assertIsInstance(klass(), klass) | |||
self.assertIsInstance(klass(), tokens.Token) | |||
def test_attributes(self): | |||
"""check that Token attributes can be managed properly""" | |||
token1 = tokens.Token() | |||
token2 = tokens.Token(foo="bar", baz=123) | |||
self.assertEqual("bar", token2.foo) | |||
self.assertEqual(123, token2.baz) | |||
self.assertRaises(KeyError, lambda: token1.foo) | |||
self.assertRaises(KeyError, lambda: token2.bar) | |||
token1.spam = "eggs" | |||
token2.foo = "ham" | |||
del token2.baz | |||
self.assertEqual("eggs", token1.spam) | |||
self.assertEqual("ham", token2.foo) | |||
self.assertRaises(KeyError, lambda: token2.baz) | |||
self.assertRaises(KeyError, delattr, token2, "baz") | |||
def test_repr(self): | |||
"""check that repr() on a Token works as expected""" | |||
token1 = tokens.Token() | |||
token2 = tokens.Token(foo="bar", baz=123) | |||
token3 = tokens.Text(text="earwig" * 100) | |||
hundredchars = ("earwig" * 100)[:97] + "..." | |||
self.assertEqual("Token()", repr(token1)) | |||
if py3k: | |||
token2repr1 = "Token(foo='bar', baz=123)" | |||
token2repr2 = "Token(baz=123, foo='bar')" | |||
token3repr = "Text(text='" + hundredchars + "')" | |||
else: | |||
token2repr1 = "Token(foo=u'bar', baz=123)" | |||
token2repr2 = "Token(baz=123, foo=u'bar')" | |||
token3repr = "Text(text=u'" + hundredchars + "')" | |||
token2repr = repr(token2) | |||
self.assertTrue(token2repr == token2repr1 or token2repr == token2repr2) | |||
self.assertEqual(token3repr, repr(token3)) | |||
def test_equality(self): | |||
"""check that equivalent tokens are considered equal""" | |||
token1 = tokens.Token() | |||
token2 = tokens.Token() | |||
token3 = tokens.Token(foo="bar", baz=123) | |||
token4 = tokens.Text(text="asdf") | |||
token5 = tokens.Text(text="asdf") | |||
token6 = tokens.TemplateOpen(text="asdf") | |||
self.assertEqual(token1, token2) | |||
self.assertEqual(token2, token1) | |||
self.assertEqual(token4, token5) | |||
self.assertEqual(token5, token4) | |||
self.assertNotEqual(token1, token3) | |||
self.assertNotEqual(token2, token3) | |||
self.assertNotEqual(token4, token6) | |||
self.assertNotEqual(token5, token6) | |||
def test_repr_equality(self): | |||
"check that eval(repr(token)) == token" | |||
tests = [ | |||
tokens.Token(), | |||
tokens.Token(foo="bar", baz=123), | |||
tokens.Text(text="earwig") | |||
] | |||
for token in tests: | |||
self.assertEqual(token, eval(repr(token), vars(tokens))) | |||
if __name__ == "__main__": | |||
unittest.main(verbosity=2) |
@@ -0,0 +1,67 @@ | |||
# -*- coding: utf-8 -*- | |||
# | |||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||
# | |||
# Permission is hereby granted, free of charge, to any person obtaining a copy | |||
# of this software and associated documentation files (the "Software"), to deal | |||
# in the Software without restriction, including without limitation the rights | |||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
# copies of the Software, and to permit persons to whom the Software is | |||
# furnished to do so, subject to the following conditions: | |||
# | |||
# The above copyright notice and this permission notice shall be included in | |||
# all copies or substantial portions of the Software. | |||
# | |||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
# SOFTWARE. | |||
from __future__ import unicode_literals | |||
import unittest | |||
from mwparserfromhell.nodes import Template, Text | |||
from mwparserfromhell.smart_list import SmartList | |||
from mwparserfromhell.utils import parse_anything | |||
from mwparserfromhell.wikicode import Wikicode | |||
from ._test_tree_equality import TreeEqualityTestCase | |||
class TestUtils(TreeEqualityTestCase): | |||
"""Tests for the utils module, which provides parse_anything().""" | |||
def test_parse_anything_valid(self): | |||
"""tests for valid input to utils.parse_anything()""" | |||
wrap = lambda L: Wikicode(SmartList(L)) | |||
textify = lambda L: wrap([Text(item) for item in L]) | |||
tests = [ | |||
(wrap([Text("foobar")]), textify(["foobar"])), | |||
(Template(wrap([Text("spam")])), | |||
wrap([Template(textify(["spam"]))])), | |||
("fóóbar", textify(["fóóbar"])), | |||
(b"foob\xc3\xa1r", textify(["foobár"])), | |||
(123, textify(["123"])), | |||
(True, textify(["True"])), | |||
(None, wrap([])), | |||
([Text("foo"), Text("bar"), Text("baz")], | |||
textify(["foo", "bar", "baz"])), | |||
([wrap([Text("foo")]), Text("bar"), "baz", 123, 456], | |||
textify(["foo", "bar", "baz", "123", "456"])), | |||
([[[([[((("foo",),),)], "bar"],)]]], textify(["foo", "bar"])) | |||
] | |||
for test, valid in tests: | |||
self.assertWikicodeEqual(valid, parse_anything(test)) | |||
def test_parse_anything_invalid(self): | |||
"""tests for invalid input to utils.parse_anything()""" | |||
self.assertRaises(ValueError, parse_anything, Ellipsis) | |||
self.assertRaises(ValueError, parse_anything, object) | |||
self.assertRaises(ValueError, parse_anything, object()) | |||
self.assertRaises(ValueError, parse_anything, type) | |||
self.assertRaises(ValueError, parse_anything, ["foo", [object]]) | |||
if __name__ == "__main__": | |||
unittest.main(verbosity=2) |
@@ -0,0 +1,599 @@ | |||
name: no_params | |||
label: simplest type of template | |||
input: "{{template}}" | |||
output: [TemplateOpen(), Text(text="template"), TemplateClose()] | |||
--- | |||
name: one_param_unnamed | |||
label: basic template with one unnamed parameter | |||
input: "{{foo|bar}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateClose()] | |||
--- | |||
name: one_param_named | |||
label: basic template with one named parameter | |||
input: "{{foo|bar=baz}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] | |||
--- | |||
name: multiple_unnamed_params | |||
label: basic template with multiple unnamed parameters | |||
input: "{{foo|bar|baz|biz|buzz}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateClose()] | |||
--- | |||
name: multiple_named_params | |||
label: basic template with multiple named parameters | |||
input: "{{foo|bar=baz|biz=buzz|buff=baff|usr=bin}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), Text(text="buzz"), TemplateParamSeparator(), Text(text="buff"), TemplateParamEquals(), Text(text="baff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamEquals(), Text(text="bin"), TemplateClose()] | |||
--- | |||
name: multiple_mixed_params | |||
label: basic template with multiple unnamed/named parameters | |||
input: "{{foo|bar=baz|biz|buzz=buff|usr|bin}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateParamEquals(), Text(text="buff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamSeparator(), Text(text="bin"), TemplateClose()] | |||
--- | |||
name: multiple_mixed_params2 | |||
label: basic template with multiple unnamed/named parameters in another order | |||
input: "{{foo|bar|baz|biz=buzz|buff=baff|usr=bin}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), Text(text="buzz"), TemplateParamSeparator(), Text(text="buff"), TemplateParamEquals(), Text(text="baff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamEquals(), Text(text="bin"), TemplateClose()] | |||
--- | |||
name: nested_unnamed_param | |||
label: nested template as an unnamed parameter | |||
input: "{{foo|{{bar}}}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()] | |||
--- | |||
name: nested_named_param_value | |||
label: nested template as a parameter value with a named parameter | |||
input: "{{foo|bar={{baz}}}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] | |||
--- | |||
name: nested_named_param_name_and_value | |||
label: nested templates as a parameter name and value | |||
input: "{{foo|{{bar}}={{baz}}}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] | |||
--- | |||
name: nested_name_start | |||
label: nested template at the beginning of a template name | |||
input: "{{{{foo}}bar}}" | |||
output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose()] | |||
--- | |||
name: nested_name_start_unnamed_param | |||
label: nested template at the beginning of a template name and as an unnamed parameter | |||
input: "{{{{foo}}bar|{{baz}}}}" | |||
output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] | |||
--- | |||
name: nested_name_start_named_param_value | |||
label: nested template at the beginning of a template name and as a parameter value with a named parameter | |||
input: "{{{{foo}}bar|baz={{biz}}}}" | |||
output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] | |||
--- | |||
name: nested_name_start_named_param_name_and_value | |||
label: nested template at the beginning of a template name and as a parameter name and value | |||
input: "{{{{foo}}bar|{{baz}}={{biz}}}}" | |||
output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] | |||
--- | |||
name: nested_name_end | |||
label: nested template at the end of a template name | |||
input: "{{foo{{bar}}}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()] | |||
--- | |||
name: nested_name_end_unnamed_param | |||
label: nested template at the end of a template name and as an unnamed parameter | |||
input: "{{foo{{bar}}|{{baz}}}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] | |||
--- | |||
name: nested_name_end_named_param_value | |||
label: nested template at the end of a template name and as a parameter value with a named parameter | |||
input: "{{foo{{bar}}|baz={{biz}}}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] | |||
--- | |||
name: nested_name_end_named_param_name_and_value | |||
label: nested template at the end of a template name and as a parameter name and value | |||
input: "{{foo{{bar}}|{{baz}}={{biz}}}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] | |||
--- | |||
name: nested_name_mid | |||
label: nested template in the middle of a template name | |||
input: "{{foo{{bar}}baz}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose()] | |||
--- | |||
name: nested_name_mid_unnamed_param | |||
label: nested template in the middle of a template name and as an unnamed parameter | |||
input: "{{foo{{bar}}baz|{{biz}}}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] | |||
--- | |||
name: nested_name_mid_named_param_value | |||
label: nested template in the middle of a template name and as a parameter value with a named parameter | |||
input: "{{foo{{bar}}baz|biz={{buzz}}}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()] | |||
--- | |||
name: nested_name_mid_named_param_name_and_value | |||
label: nested template in the middle of a template name and as a parameter name and value | |||
input: "{{foo{{bar}}baz|{{biz}}={{buzz}}}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()] | |||
--- | |||
name: nested_name_start_end | |||
label: nested template at the beginning and end of a template name | |||
input: "{{{{foo}}{{bar}}}}" | |||
output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()] | |||
--- | |||
name: nested_name_start_end_unnamed_param | |||
label: nested template at the beginning and end of a template name and as an unnamed parameter | |||
input: "{{{{foo}}{{bar}}|{{baz}}}}" | |||
output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] | |||
--- | |||
name: nested_name_start_end_named_param_value | |||
label: nested template at the beginning and end of a template name and as a parameter value with a named parameter | |||
input: "{{{{foo}}{{bar}}|baz={{biz}}}}" | |||
output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] | |||
--- | |||
name: nested_name_start_end_named_param_name_and_value | |||
label: nested template at the beginning and end of a template name and as a parameter name and value | |||
input: "{{{{foo}}{{bar}}|{{baz}}={{biz}}}}" | |||
output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] | |||
--- | |||
name: nested_names_multiple | |||
label: multiple nested templates within nested templates | |||
input: "{{{{{{{{foo}}bar}}baz}}biz}}" | |||
output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateClose()] | |||
--- | |||
name: nested_names_multiple_unnamed_param | |||
label: multiple nested templates within nested templates with a nested unnamed parameter | |||
input: "{{{{{{{{foo}}bar}}baz}}biz|{{buzz}}}}" | |||
output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()] | |||
--- | |||
name: nested_names_multiple_named_param_value | |||
label: multiple nested templates within nested templates with a nested parameter value in a named parameter | |||
input: "{{{{{{{{foo}}bar}}baz}}biz|buzz={{bin}}}}" | |||
output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateParamEquals(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()] | |||
--- | |||
name: nested_names_multiple_named_param_name_and_value | |||
label: multiple nested templates within nested templates with a nested parameter name and value | |||
input: "{{{{{{{{foo}}bar}}baz}}biz|{{buzz}}={{bin}}}}" | |||
output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()] | |||
--- | |||
name: mixed_nested_templates | |||
label: mixed assortment of nested templates within template names, parameter names, and values | |||
input: "{{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}" | |||
output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), Text(text="biz"), TemplateClose(), Text(text="buzz"), TemplateClose(), Text(text="usr"), TemplateParamSeparator(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()] | |||
--- | |||
name: newlines_start | |||
label: a newline at the start of a template name | |||
input: "{{\nfoobar}}" | |||
output: [TemplateOpen(), Text(text="\nfoobar"), TemplateClose()] | |||
--- | |||
name: newlines_end | |||
label: a newline at the end of a template name | |||
input: "{{foobar\n}}" | |||
output: [TemplateOpen(), Text(text="foobar\n"), TemplateClose()] | |||
--- | |||
name: newlines_start_end | |||
label: a newline at the start and end of a template name | |||
input: "{{\nfoobar\n}}" | |||
output: [TemplateOpen(), Text(text="\nfoobar\n"), TemplateClose()] | |||
--- | |||
name: newlines_mid | |||
label: a newline at the middle of a template name | |||
input: "{{foo\nbar}}" | |||
output: [Text(text="{{foo\nbar}}")] | |||
--- | |||
name: newlines_start_mid | |||
label: a newline at the start and middle of a template name | |||
input: "{{\nfoo\nbar}}" | |||
output: [Text(text="{{\nfoo\nbar}}")] | |||
--- | |||
name: newlines_mid_end | |||
label: a newline at the middle and end of a template name | |||
input: "{{foo\nbar\n}}" | |||
output: [Text(text="{{foo\nbar\n}}")] | |||
--- | |||
name: newlines_start_mid_end | |||
label: a newline at the start, middle, and end of a template name | |||
input: "{{\nfoo\nbar\n}}" | |||
output: [Text(text="{{\nfoo\nbar\n}}")] | |||
--- | |||
name: newlines_unnamed_param | |||
label: newlines within an unnamed template parameter | |||
input: "{{foo|\nb\nar\n}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()] | |||
--- | |||
name: newlines_enclose_template_name_unnamed_param | |||
label: newlines enclosing a template name and within an unnamed template parameter | |||
input: "{{\nfoo\n|\nb\nar\n}}" | |||
output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()] | |||
--- | |||
name: newlines_within_template_name_unnamed_param | |||
label: newlines within a template name and within an unnamed template parameter | |||
input: "{{\nfo\no\n|\nb\nar\n}}" | |||
output: [Text(text="{{\nfo\no\n|\nb\nar\n}}")] | |||
--- | |||
name: newlines_enclose_template_name_named_param_value | |||
label: newlines enclosing a template name and within a named parameter value | |||
input: "{{\nfoo\n|1=\nb\nar\n}}" | |||
output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nb\nar\n"), TemplateClose()] | |||
--- | |||
name: newlines_within_template_name_named_param_value | |||
label: newlines within a template name and within a named parameter value | |||
input: "{{\nf\noo\n|1=\nb\nar\n}}" | |||
output: [Text(text="{{\nf\noo\n|1=\nb\nar\n}}")] | |||
--- | |||
name: newlines_named_param_name | |||
label: newlines within a parameter name | |||
input: "{{foo|\nb\nar\n=baz}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] | |||
--- | |||
name: newlines_named_param_name_param_value | |||
label: newlines within a parameter name and within a parameter value | |||
input: "{{foo|\nb\nar\n=\nba\nz\n}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nba\nz\n"), TemplateClose()] | |||
--- | |||
name: newlines_enclose_template_name_named_param_name | |||
label: newlines enclosing a template name and within a parameter name | |||
input: "{{\nfoo\n|\nb\nar\n=baz}}" | |||
output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] | |||
--- | |||
name: newlines_enclose_template_name_named_param_name_param_value | |||
label: newlines enclosing a template name and within a parameter name and within a parameter value | |||
input: "{{\nfoo\n|\nb\nar\n=\nba\nz\n}}" | |||
output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nba\nz\n"), TemplateClose()] | |||
--- | |||
name: newlines_within_template_name_named_param_name | |||
label: newlines within a template name and within a parameter name | |||
input: "{{\nfo\no\n|\nb\nar\n=baz}}" | |||
output: [Text(text="{{\nfo\no\n|\nb\nar\n=baz}}")] | |||
--- | |||
name: newlines_within_template_name_named_param_name_param_value | |||
label: newlines within a template name and within a parameter name and within a parameter value | |||
input: "{{\nf\noo\n|\nb\nar\n=\nba\nz\n}}" | |||
output: [Text(text="{{\nf\noo\n|\nb\nar\n=\nba\nz\n}}")] | |||
--- | |||
name: newlines_wildcard | |||
label: a random, complex assortment of templates and newlines | |||
input: "{{\nfoo\n|\nb\nar\n=\nb\naz\n|\nb\nuz\n}}" | |||
output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nb\naz\n"), TemplateParamSeparator(), Text(text="\nb\nuz\n"), TemplateClose()] | |||
--- | |||
name: newlines_wildcard_redux | |||
label: an even more random and complex assortment of templates and newlines | |||
input: "{{\nfoo\n|\n{{\nbar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}" | |||
output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\n"), TemplateOpen(), Text(text="\nbar\n"), TemplateParamSeparator(), Text(text="\nb\naz\n"), TemplateParamEquals(), Text(text="\nb\niz\n"), TemplateClose(), Text(text="\n"), TemplateParamEquals(), Text(text="\nb\nuzz\n"), TemplateClose()] | |||
--- | |||
name: newlines_wildcard_redux_invalid | |||
label: a variation of the newlines_wildcard_redux test that is invalid | |||
input: "{{\nfoo\n|\n{{\nb\nar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}" | |||
output: [Text(text="{{\nfoo\n|\n{{\nb\nar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}")] | |||
--- | |||
name: invalid_name_left_brace_middle | |||
label: invalid characters in template name: left brace in middle | |||
input: "{{foo{bar}}" | |||
output: [Text(text="{{foo{bar}}")] | |||
--- | |||
name: invalid_name_right_brace_middle | |||
label: invalid characters in template name: right brace in middle | |||
input: "{{foo}bar}}" | |||
output: [Text(text="{{foo}bar}}")] | |||
--- | |||
name: invalid_name_left_braces | |||
label: invalid characters in template name: two left braces in middle | |||
input: "{{foo{b{ar}}" | |||
output: [Text(text="{{foo{b{ar}}")] | |||
--- | |||
name: invalid_name_left_bracket_middle | |||
label: invalid characters in template name: left bracket in middle | |||
input: "{{foo[bar}}" | |||
output: [Text(text="{{foo[bar}}")] | |||
--- | |||
name: invalid_name_right_bracket_middle | |||
label: invalid characters in template name: right bracket in middle | |||
input: "{{foo]bar}}" | |||
output: [Text(text="{{foo]bar}}")] | |||
--- | |||
name: invalid_name_left_bracket_start | |||
label: invalid characters in template name: left bracket at start | |||
input: "{{[foobar}}" | |||
output: [Text(text="{{[foobar}}")] | |||
--- | |||
name: invalid_name_right_bracket_start | |||
label: invalid characters in template name: right bracket at end | |||
input: "{{foobar]}}" | |||
output: [Text(text="{{foobar]}}")] | |||
--- | |||
name: valid_name_left_brace_start | |||
label: valid characters in template name: left brace at start | |||
input: "{{{foobar}}" | |||
output: [Text(text="{"), TemplateOpen(), Text(text="foobar"), TemplateClose()] | |||
--- | |||
name: valid_unnamed_param_left_brace | |||
label: valid characters in unnamed template parameter: left brace | |||
input: "{{foo|ba{r}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r"), TemplateClose()] | |||
--- | |||
name: valid_unnamed_param_braces | |||
label: valid characters in unnamed template parameter: left and right braces | |||
input: "{{foo|ba{r}}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r"), TemplateClose(), Text(text="}")] | |||
--- | |||
name: valid_param_name_braces | |||
label: valid characters in template parameter name: left and right braces | |||
input: "{{foo|ba{r}=baz}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r}"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] | |||
--- | |||
name: valid_param_name_brackets | |||
label: valid characters in unnamed template parameter: left and right brackets | |||
input: "{{foo|ba[r]=baz}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba[r]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] | |||
--- | |||
name: valid_param_name_double_left_brackets | |||
label: valid characters in unnamed template parameter: double left brackets | |||
input: "{{foo|bar[[in\nvalid=baz}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar[[in\nvalid"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] | |||
--- | |||
name: valid_param_name_double_right_brackets | |||
label: valid characters in unnamed template parameter: double right brackets | |||
input: "{{foo|bar]]=baz}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar]]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] | |||
--- | |||
name: valid_param_name_double_brackets | |||
label: valid characters in unnamed template parameter: double left and right brackets | |||
input: "{{foo|bar[[in\nvalid]]=baz}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar[[in\nvalid]]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] | |||
--- | |||
name: invalid_param_name_double_left_braces | |||
label: invalid characters in template parameter name: double left braces | |||
input: "{{foo|bar{{in\nvalid=baz}}" | |||
output: [Text(text="{{foo|bar{{in\nvalid=baz}}")] | |||
--- | |||
name: invalid_param_name_double_braces | |||
label: invalid characters in template parameter name: double left and right braces | |||
input: "{{foo|bar{{in\nvalid}}=baz}}" | |||
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar{{in\nvalid"), TemplateClose(), Text(text="=baz}}")] | |||
--- | |||
name: incomplete_plain | |||
label: incomplete templates that should fail gracefully: no close whatsoever | |||
input: "{{stuff}} {{foobar" | |||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar")] | |||
--- | |||
name: incomplete_right_brace | |||
label: incomplete templates that should fail gracefully: only one right brace | |||
input: "{{stuff}} {{foobar}" | |||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar}")] | |||
--- | |||
name: incomplete_pipe | |||
label: incomplete templates that should fail gracefully: a pipe | |||
input: "{{stuff}} {{foobar|" | |||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar|")] | |||
--- | |||
name: incomplete_unnamed_param | |||
label: incomplete templates that should fail gracefully: an unnamed parameter | |||
input: "{{stuff}} {{foo|bar" | |||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar")] | |||
--- | |||
name: incomplete_unnamed_param_pipe | |||
label: incomplete templates that should fail gracefully: an unnamed parameter, then a pipe | |||
input: "{{stuff}} {{foo|bar|" | |||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|")] | |||
--- | |||
name: incomplete_valueless_param | |||
label: incomplete templates that should fail gracefully: an a named parameter with no value | |||
input: "{{stuff}} {{foo|bar=" | |||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=")] | |||
--- | |||
name: incomplete_valueless_param_pipe | |||
label: incomplete templates that should fail gracefully: a named parameter with no value, then a pipe | |||
input: "{{stuff}} {{foo|bar=|" | |||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=|")] | |||
--- | |||
name: incomplete_named_param | |||
label: incomplete templates that should fail gracefully: a named parameter with a value | |||
input: "{{stuff}} {{foo|bar=baz" | |||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz")] | |||
--- | |||
name: incomplete_named_param_pipe | |||
label: incomplete templates that should fail gracefully: a named parameter with a value, then a paipe | |||
input: "{{stuff}} {{foo|bar=baz|" | |||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|")] | |||
--- | |||
name: incomplete_two_unnamed_params | |||
label: incomplete templates that should fail gracefully: two unnamed parameters | |||
input: "{{stuff}} {{foo|bar|baz" | |||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz")] | |||
--- | |||
name: incomplete_unnamed_param_valueless_param | |||
label: incomplete templates that should fail gracefully: an unnamed parameter, then a named parameter with no value | |||
input: "{{stuff}} {{foo|bar|baz=" | |||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz=")] | |||
--- | |||
name: incomplete_unnamed_param_named_param | |||
label: incomplete templates that should fail gracefully: an unnamed parameter, then a named parameter with a value | |||
input: "{{stuff}} {{foo|bar|baz=biz" | |||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz=biz")] | |||
--- | |||
name: incomplete_named_param_unnamed_param | |||
label: incomplete templates that should fail gracefully: a named parameter with a value, then an unnamed parameter | |||
input: "{{stuff}} {{foo|bar=baz|biz" | |||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz")] | |||
--- | |||
name: incomplete_named_param_valueless_param | |||
label: incomplete templates that should fail gracefully: a named parameter with a value, then a named parameter with no value | |||
input: "{{stuff}} {{foo|bar=baz|biz=" | |||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz=")] | |||
--- | |||
name: incomplete_two_named_params | |||
label: incomplete templates that should fail gracefully: two named parameters with values | |||
input: "{{stuff}} {{foo|bar=baz|biz=buzz" | |||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz=buzz")] | |||
--- | |||
name: incomplete_nested_template_as_unnamed_param | |||
label: incomplete templates that should fail gracefully: a valid nested template as an unnamed parameter | |||
input: "{{stuff}} {{foo|{{bar}}" | |||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|"), TemplateOpen(), Text(text="bar"), TemplateClose()] | |||
--- | |||
name: incomplete_nested_template_as_param_value | |||
label: incomplete templates that should fail gracefully: a valid nested template as a parameter value | |||
input: "{{stuff}} {{foo|bar={{baz}}" | |||
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar="), TemplateOpen(), Text(text="baz"), TemplateClose()] |
@@ -0,0 +1,25 @@ | |||
name: basic | |||
label: sanity check for basic text parsing, no gimmicks | |||
input: "foobar" | |||
output: [Text(text="foobar")] | |||
--- | |||
name: newlines | |||
label: slightly more complex text parsing, with newlines | |||
input: "This is a line of text.\nThis is another line of text.\nThis is another." | |||
output: [Text(text="This is a line of text.\nThis is another line of text.\nThis is another.")] | |||
--- | |||
name: unicode | |||
label: ensure unicode data is handled properly | |||
input: "Thís ís å sëñtënce with diœcritiçs." | |||
output: [Text(text="Thís ís å sëñtënce with diœcritiçs.")] | |||
--- | |||
name: unicode2 | |||
label: additional unicode check for non-BMP codepoints | |||
input: "𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰" | |||
output: [Text(text="𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰")] |