diff --git a/.gitignore b/.gitignore index d70b37d..ec4e8ca 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *.pyc +*.so *.egg *.egg-info .DS_Store diff --git a/README.rst b/README.rst index 77f12c7..90e896f 100644 --- a/README.rst +++ b/README.rst @@ -18,7 +18,8 @@ so you can install the latest release with ``pip install mwparserfromhell`` cd mwparserfromhell python setup.py install -You can run the comprehensive unit testing suite with ``python setup.py test``. +You can run the comprehensive unit testing suite with +``python setup.py test -q``. Usage ----- @@ -124,7 +125,9 @@ following code (via the API_):: import mwparserfromhell API_URL = "http://en.wikipedia.org/w/api.php" def parse(title): - raw = urllib.urlopen(API_URL, data).read() + data = {"action": "query", "prop": "revisions", "rvlimit": 1, + "rvprop": "content", "format": "json", "titles": title} + raw = urllib.urlopen(API_URL, urllib.urlencode(data)).read() res = json.loads(raw) text = res["query"]["pages"].values()[0]["revisions"][0]["*"] return mwparserfromhell.parse(text) diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py index a1b6b8f..bb81513 100755 --- a/mwparserfromhell/compat.py +++ b/mwparserfromhell/compat.py @@ -1,29 +1,29 @@ -# -*- coding: utf-8 -*- - -""" -Implements support for both Python 2 and Python 3 by defining common types in -terms of their Python 2/3 variants. For example, :py:class:`str` is set to -:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise, -:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These -types are meant to be imported directly from within the parser's modules. -""" - -import sys - -py3k = sys.version_info[0] == 3 - -if py3k: - bytes = bytes - str = str - basestring = str - maxsize = sys.maxsize - import html.entities as htmlentities - -else: - bytes = str - str = unicode - basestring = basestring - maxsize = sys.maxint - import htmlentitydefs as htmlentities - -del sys +# -*- coding: utf-8 -*- + +""" +Implements support for both Python 2 and Python 3 by defining common types in +terms of their Python 2/3 variants. For example, :py:class:`str` is set to +:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise, +:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These +types are meant to be imported directly from within the parser's modules. +""" + +import sys + +py3k = sys.version_info[0] == 3 + +if py3k: + bytes = bytes + str = str + basestring = str + maxsize = sys.maxsize + import html.entities as htmlentities + +else: + bytes = str + str = unicode + basestring = basestring + maxsize = sys.maxint + import htmlentitydefs as htmlentities + +del sys diff --git a/mwparserfromhell/nodes/argument.py b/mwparserfromhell/nodes/argument.py index 06facb4..d7db92a 100644 --- a/mwparserfromhell/nodes/argument.py +++ b/mwparserfromhell/nodes/argument.py @@ -30,6 +30,7 @@ __all__ = ["Argument"] class Argument(Node): """Represents a template argument substitution, like ``{{{foo}}}``.""" + def __init__(self, name, default=None): super(Argument, self).__init__() self._name = name diff --git a/mwparserfromhell/nodes/comment.py b/mwparserfromhell/nodes/comment.py index b34c29e..e96ce38 100644 --- a/mwparserfromhell/nodes/comment.py +++ b/mwparserfromhell/nodes/comment.py @@ -29,6 +29,7 @@ __all__ = ["Comment"] class Comment(Node): """Represents a hidden HTML comment, like ````.""" + def __init__(self, contents): super(Comment, self).__init__() self._contents = contents diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py index 221040b..5b7607c 100644 --- a/mwparserfromhell/nodes/html_entity.py +++ b/mwparserfromhell/nodes/html_entity.py @@ -135,7 +135,10 @@ class HTMLEntity(Node): @hex_char.setter def hex_char(self, newval): - self._hex_char = bool(newval) + newval = str(newval) + if newval not in ("x", "X"): + raise ValueError(newval) + self._hex_char = newval def normalize(self): """Return the unicode character represented by the HTML entity.""" diff --git a/mwparserfromhell/nodes/text.py b/mwparserfromhell/nodes/text.py index 60ba847..6fda3da 100644 --- a/mwparserfromhell/nodes/text.py +++ b/mwparserfromhell/nodes/text.py @@ -29,6 +29,7 @@ __all__ = ["Text"] class Text(Node): """Represents ordinary, unformatted text with no special properties.""" + def __init__(self, value): super(Text, self).__init__() self._value = value diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py index f880016..6fea468 100644 --- a/mwparserfromhell/nodes/wikilink.py +++ b/mwparserfromhell/nodes/wikilink.py @@ -30,6 +30,7 @@ __all__ = ["Wikilink"] class Wikilink(Node): """Represents an internal wikilink, like ``[[Foo|Bar]]``.""" + def __init__(self, title, text=None): super(Wikilink, self).__init__() self._title = title diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py index b65946c..896d137 100644 --- a/mwparserfromhell/parser/contexts.py +++ b/mwparserfromhell/parser/contexts.py @@ -62,6 +62,15 @@ Local (stack-specific) contexts: * :py:const:`COMMENT` +* :py:const:`SAFETY_CHECK` + + * :py:const:`HAS_TEXT` + * :py:const:`FAIL_ON_TEXT` + * :py:const:`FAIL_NEXT` + * :py:const:`FAIL_ON_LBRACE` + * :py:const:`FAIL_ON_RBRACE` + * :py:const:`FAIL_ON_EQUALS` + Global contexts: * :py:const:`GL_HEADING` @@ -69,29 +78,36 @@ Global contexts: # Local contexts: -TEMPLATE = 0b00000000000111 -TEMPLATE_NAME = 0b00000000000001 -TEMPLATE_PARAM_KEY = 0b00000000000010 -TEMPLATE_PARAM_VALUE = 0b00000000000100 - -ARGUMENT = 0b00000000011000 -ARGUMENT_NAME = 0b00000000001000 -ARGUMENT_DEFAULT = 0b00000000010000 - -WIKILINK = 0b00000001100000 -WIKILINK_TITLE = 0b00000000100000 -WIKILINK_TEXT = 0b00000001000000 - -HEADING = 0b01111110000000 -HEADING_LEVEL_1 = 0b00000010000000 -HEADING_LEVEL_2 = 0b00000100000000 -HEADING_LEVEL_3 = 0b00001000000000 -HEADING_LEVEL_4 = 0b00010000000000 -HEADING_LEVEL_5 = 0b00100000000000 -HEADING_LEVEL_6 = 0b01000000000000 - -COMMENT = 0b10000000000000 - +TEMPLATE = 0b00000000000000000111 +TEMPLATE_NAME = 0b00000000000000000001 +TEMPLATE_PARAM_KEY = 0b00000000000000000010 +TEMPLATE_PARAM_VALUE = 0b00000000000000000100 + +ARGUMENT = 0b00000000000000011000 +ARGUMENT_NAME = 0b00000000000000001000 +ARGUMENT_DEFAULT = 0b00000000000000010000 + +WIKILINK = 0b00000000000001100000 +WIKILINK_TITLE = 0b00000000000000100000 +WIKILINK_TEXT = 0b00000000000001000000 + +HEADING = 0b00000001111110000000 +HEADING_LEVEL_1 = 0b00000000000010000000 +HEADING_LEVEL_2 = 0b00000000000100000000 +HEADING_LEVEL_3 = 0b00000000001000000000 +HEADING_LEVEL_4 = 0b00000000010000000000 +HEADING_LEVEL_5 = 0b00000000100000000000 +HEADING_LEVEL_6 = 0b00000001000000000000 + +COMMENT = 0b00000010000000000000 + +SAFETY_CHECK = 0b11111100000000000000 +HAS_TEXT = 0b00000100000000000000 +FAIL_ON_TEXT = 0b00001000000000000000 +FAIL_NEXT = 0b00010000000000000000 +FAIL_ON_LBRACE = 0b00100000000000000000 +FAIL_ON_RBRACE = 0b01000000000000000000 +FAIL_ON_EQUALS = 0b10000000000000000000 # Global contexts: diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 09649a7..d3abb22 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -1,6 +1,6 @@ /* Tokenizer for MWParserFromHell -Copyright (C) 2012 Ben Kurtovic +Copyright (C) 2012-2013 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in @@ -843,7 +843,8 @@ Tokenizer_handle_heading_end(Tokenizer* self) self->head++; } current = log2(self->topstack->context / LC_HEADING_LEVEL_1) + 1; - level = current > best ? (best > 6 ? 6 : best) : (current > 6 ? 6 : current); + level = current > best ? (best > 6 ? 6 : best) : + (current > 6 ? 6 : current); after = (HeadingData*) Tokenizer_parse(self, self->topstack->context); if (BAD_ROUTE) { RESET_ROUTE(); @@ -956,11 +957,11 @@ Tokenizer_really_parse_entity(Tokenizer* self) else numeric = hexadecimal = 0; if (hexadecimal) - valid = "0123456789abcdefABCDEF"; + valid = HEXDIGITS; else if (numeric) - valid = "0123456789"; + valid = DIGITS; else - valid = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + valid = ALPHANUM; text = calloc(MAX_ENTITY_SIZE, sizeof(char)); if (!text) { PyErr_NoMemory(); @@ -1005,7 +1006,7 @@ Tokenizer_really_parse_entity(Tokenizer* self) i = 0; while (1) { def = entitydefs[i]; - if (!def) // We've reached the end of the def list without finding it + if (!def) // We've reached the end of the defs without finding it FAIL_ROUTE_AND_EXIT() if (strcmp(text, def) == 0) break; @@ -1135,48 +1136,59 @@ Tokenizer_parse_comment(Tokenizer* self) } /* - Make sure we are not trying to write an invalid character. + Make sure we are not trying to write an invalid character. Return 0 if + everything is safe, or -1 if the route must be failed. */ -static void +static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) { if (context & LC_FAIL_NEXT) { - Tokenizer_fail_route(self); - return; + return -1; } if (context & LC_WIKILINK_TITLE) { if (data == *"]" || data == *"{") self->topstack->context |= LC_FAIL_NEXT; else if (data == *"\n" || data == *"[" || data == *"}") - Tokenizer_fail_route(self); - return; + return -1; + return 0; } if (context & LC_TEMPLATE_NAME) { if (data == *"{" || data == *"}" || data == *"[") { self->topstack->context |= LC_FAIL_NEXT; - return; + return 0; } if (data == *"]") { - Tokenizer_fail_route(self); - return; + return -1; } if (data == *"|") - return; + return 0; + if (context & LC_HAS_TEXT) { + if (context & LC_FAIL_ON_TEXT) { + if (!Py_UNICODE_ISSPACE(data)) + return -1; + } + else { + if (data == *"\n") + self->topstack->context |= LC_FAIL_ON_TEXT; + } + } + else if (!Py_UNICODE_ISSPACE(data)) + self->topstack->context |= LC_HAS_TEXT; } - else if (context & (LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)) { + else { if (context & LC_FAIL_ON_EQUALS) { if (data == *"=") { - Tokenizer_fail_route(self); - return; + return -1; } } else if (context & LC_FAIL_ON_LBRACE) { - if (data == *"{") { + if (data == *"{" || (Tokenizer_READ(self, -1) == *"{" && + Tokenizer_READ(self, -2) == *"{")) { if (context & LC_TEMPLATE) self->topstack->context |= LC_FAIL_ON_EQUALS; else self->topstack->context |= LC_FAIL_NEXT; - return; + return 0; } self->topstack->context ^= LC_FAIL_ON_LBRACE; } @@ -1186,7 +1198,7 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) self->topstack->context |= LC_FAIL_ON_EQUALS; else self->topstack->context |= LC_FAIL_NEXT; - return; + return 0; } self->topstack->context ^= LC_FAIL_ON_RBRACE; } @@ -1195,47 +1207,7 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) else if (data == *"}") self->topstack->context |= LC_FAIL_ON_RBRACE; } - if (context & LC_HAS_TEXT) { - if (context & LC_FAIL_ON_TEXT) { - if (!Py_UNICODE_ISSPACE(data)) { - if (context & LC_TEMPLATE_PARAM_KEY) { - self->topstack->context ^= LC_FAIL_ON_TEXT; - self->topstack->context |= LC_FAIL_ON_EQUALS; - } - else - Tokenizer_fail_route(self); - return; - } - } - else { - if (data == *"\n") - self->topstack->context |= LC_FAIL_ON_TEXT; - } - } - else if (!Py_UNICODE_ISSPACE(data)) - self->topstack->context |= LC_HAS_TEXT; -} - -/* - Unset any safety-checking contexts set by Tokenizer_verify_safe(). Used - when we preserve a context but previous data becomes invalid, like when - moving between template parameters. -*/ -static void -Tokenizer_reset_safety_checks(Tokenizer* self) -{ - static int checks[] = { - LC_HAS_TEXT, LC_FAIL_ON_TEXT, LC_FAIL_NEXT, LC_FAIL_ON_LBRACE, - LC_FAIL_ON_RBRACE, LC_FAIL_ON_EQUALS, 0}; - int context = self->topstack->context, i = 0, this; - while (1) { - this = checks[i]; - if (!this) - return; - if (context & this) - self->topstack->context ^= this; - i++; - } + return 0; } /* @@ -1258,12 +1230,12 @@ Tokenizer_parse(Tokenizer* self, int context) this = Tokenizer_READ(self, 0); this_context = self->topstack->context; if (this_context & unsafe_contexts) { - Tokenizer_verify_safe(self, this_context, this); - if (BAD_ROUTE) { + if (Tokenizer_verify_safe(self, this_context, this) < 0) { if (this_context & LC_TEMPLATE_PARAM_KEY) { trash = Tokenizer_pop(self); Py_XDECREF(trash); } + Tokenizer_fail_route(self); return NULL; } } @@ -1303,7 +1275,6 @@ Tokenizer_parse(Tokenizer* self, int context) self->topstack->context ^= LC_FAIL_NEXT; } else if (this == *"|" && this_context & LC_TEMPLATE) { - Tokenizer_reset_safety_checks(self); if (Tokenizer_handle_template_param(self)) return NULL; } @@ -1324,10 +1295,14 @@ Tokenizer_parse(Tokenizer* self, int context) Tokenizer_write_text(self, this); } else if (this == next && next == *"[") { - if (Tokenizer_parse_wikilink(self)) - return NULL; - if (self->topstack->context & LC_FAIL_NEXT) - self->topstack->context ^= LC_FAIL_NEXT; + if (!(this_context & LC_WIKILINK_TITLE)) { + if (Tokenizer_parse_wikilink(self)) + return NULL; + if (self->topstack->context & LC_FAIL_NEXT) + self->topstack->context ^= LC_FAIL_NEXT; + } + else + Tokenizer_write_text(self, this); } else if (this == *"|" && this_context & LC_WIKILINK_TITLE) { if (Tokenizer_handle_wikilink_separator(self)) @@ -1401,7 +1376,8 @@ Tokenizer_tokenize(Tokenizer* self, PyObject* args) PyMODINIT_FUNC init_tokenizer(void) { - PyObject *module, *tempmodule, *defmap, *deflist, *globals, *locals, *fromlist, *modname; + PyObject *module, *tempmod, *defmap, *deflist, *globals, *locals, + *fromlist, *modname; unsigned numdefs, i; char *name; @@ -1411,14 +1387,16 @@ init_tokenizer(void) module = Py_InitModule("_tokenizer", module_methods); Py_INCREF(&TokenizerType); PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType); + Py_INCREF(Py_True); + PyDict_SetItemString(TokenizerType.tp_dict, "USES_C", Py_True); - tempmodule = PyImport_ImportModule("htmlentitydefs"); - if (!tempmodule) + tempmod = PyImport_ImportModule("htmlentitydefs"); + if (!tempmod) return; - defmap = PyObject_GetAttrString(tempmodule, "entitydefs"); + defmap = PyObject_GetAttrString(tempmod, "entitydefs"); if (!defmap) return; - Py_DECREF(tempmodule); + Py_DECREF(tempmod); deflist = PyDict_Keys(defmap); if (!deflist) return; @@ -1442,18 +1420,20 @@ init_tokenizer(void) if (!modname) return; PyList_SET_ITEM(fromlist, 0, modname); - tempmodule = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0); + tempmod = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0); Py_DECREF(fromlist); - if (!tempmodule) + if (!tempmod) return; - tokens = PyObject_GetAttrString(tempmodule, "tokens"); - Py_DECREF(tempmodule); + tokens = PyObject_GetAttrString(tempmod, "tokens"); + Py_DECREF(tempmod); Text = PyObject_GetAttrString(tokens, "Text"); TemplateOpen = PyObject_GetAttrString(tokens, "TemplateOpen"); - TemplateParamSeparator = PyObject_GetAttrString(tokens, "TemplateParamSeparator"); - TemplateParamEquals = PyObject_GetAttrString(tokens, "TemplateParamEquals"); + TemplateParamSeparator = PyObject_GetAttrString(tokens, + "TemplateParamSeparator"); + TemplateParamEquals = PyObject_GetAttrString(tokens, + "TemplateParamEquals"); TemplateClose = PyObject_GetAttrString(tokens, "TemplateClose"); ArgumentOpen = PyObject_GetAttrString(tokens, "ArgumentOpen"); diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index 3293a8f..693538c 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -1,6 +1,6 @@ /* Tokenizer Header File for MWParserFromHell -Copyright (C) 2012 Ben Kurtovic +Copyright (C) 2012-2013 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in @@ -36,6 +36,10 @@ SOFTWARE. #define malloc PyObject_Malloc #define free PyObject_Free +#define DIGITS "0123456789" +#define HEXDIGITS "0123456789abcdefABCDEF" +#define ALPHANUM "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + static const char* MARKERS[] = { "{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", "/", "-", "!", "\n", ""}; @@ -118,6 +122,7 @@ static PyObject* TagCloseClose; #define LC_COMMENT 0x02000 +#define LC_SAFETY_CHECK 0xFC000 #define LC_HAS_TEXT 0x04000 #define LC_FAIL_ON_TEXT 0x08000 #define LC_FAIL_NEXT 0x10000 @@ -205,8 +210,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer*); static int Tokenizer_really_parse_entity(Tokenizer*); static int Tokenizer_parse_entity(Tokenizer*); static int Tokenizer_parse_comment(Tokenizer*); -static void Tokenizer_verify_safe(Tokenizer*, int, Py_UNICODE); -static void Tokenizer_reset_safety_checks(Tokenizer*); +static int Tokenizer_verify_safe(Tokenizer*, int, Py_UNICODE); static PyObject* Tokenizer_parse(Tokenizer*, int); static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*); diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index c02e353..f995937 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -37,6 +37,7 @@ class BadRoute(Exception): class Tokenizer(object): """Creates a list of tokens from a string of wikicode.""" + USES_C = False START = object() END = object() MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", @@ -212,28 +213,9 @@ class Tokenizer(object): self._write_all(argument) self._write(tokens.ArgumentClose()) - def _verify_safe(self, unsafes, strip=True): - """Verify that there are no unsafe characters in the current stack. - - The route will be failed if the name contains any element of *unsafes* - in it. This is used when parsing template names, parameter keys, and so - on, which cannot contain newlines and some other characters. If *strip* - is ``True``, the text will be stripped of whitespace, since this is - allowed at the ends of certain elements but not between text. - """ - self._push_textbuffer() - if self._stack: - text = [tok for tok in self._stack if isinstance(tok, tokens.Text)] - text = "".join([token.text for token in text]) - if strip: - text = text.strip() - if text and any([unsafe in text for unsafe in unsafes]): - self._fail_route() - def _handle_template_param(self): """Handle a template parameter at the head of the string.""" if self._context & contexts.TEMPLATE_NAME: - self._verify_safe(["\n", "{", "}", "[", "]"]) self._context ^= contexts.TEMPLATE_NAME elif self._context & contexts.TEMPLATE_PARAM_VALUE: self._context ^= contexts.TEMPLATE_PARAM_VALUE @@ -245,11 +227,6 @@ class Tokenizer(object): def _handle_template_param_value(self): """Handle a template parameter's value at the head of the string.""" - try: - self._verify_safe(["\n", "{{", "}}"]) - except BadRoute: - self._pop() - raise self._write_all(self._pop(keep_context=True)) self._context ^= contexts.TEMPLATE_PARAM_KEY self._context |= contexts.TEMPLATE_PARAM_VALUE @@ -257,24 +234,19 @@ class Tokenizer(object): def _handle_template_end(self): """Handle the end of a template at the head of the string.""" - if self._context & contexts.TEMPLATE_NAME: - self._verify_safe(["\n", "{", "}", "[", "]"]) - elif self._context & contexts.TEMPLATE_PARAM_KEY: + if self._context & contexts.TEMPLATE_PARAM_KEY: self._write_all(self._pop(keep_context=True)) self._head += 1 return self._pop() def _handle_argument_separator(self): """Handle the separator between an argument's name and default.""" - self._verify_safe(["\n", "{{", "}}"]) self._context ^= contexts.ARGUMENT_NAME self._context |= contexts.ARGUMENT_DEFAULT self._write(tokens.ArgumentSeparator()) def _handle_argument_end(self): """Handle the end of an argument at the head of the string.""" - if self._context & contexts.ARGUMENT_NAME: - self._verify_safe(["\n", "{{", "}}"]) self._head += 2 return self._pop() @@ -294,15 +266,12 @@ class Tokenizer(object): def _handle_wikilink_separator(self): """Handle the separator between a wikilink's title and its text.""" - self._verify_safe(["\n", "{", "}", "[", "]"], strip=False) self._context ^= contexts.WIKILINK_TITLE self._context |= contexts.WIKILINK_TEXT self._write(tokens.WikilinkSeparator()) def _handle_wikilink_end(self): """Handle the end of a wikilink at the head of the string.""" - if self._context & contexts.WIKILINK_TITLE: - self._verify_safe(["\n", "{", "}", "[", "]"], strip=False) self._head += 1 return self._pop() @@ -423,11 +392,73 @@ class Tokenizer(object): self._write(tokens.CommentEnd()) self._head += 2 + def _verify_safe(self, this): + """Make sure we are not trying to write an invalid character.""" + context = self._context + if context & contexts.FAIL_NEXT: + return False + if context & contexts.WIKILINK_TITLE: + if this == "]" or this == "{": + self._context |= contexts.FAIL_NEXT + elif this == "\n" or this == "[" or this == "}": + return False + return True + if context & contexts.TEMPLATE_NAME: + if this == "{" or this == "}" or this == "[": + self._context |= contexts.FAIL_NEXT + return True + if this == "]": + return False + if this == "|": + return True + if context & contexts.HAS_TEXT: + if context & contexts.FAIL_ON_TEXT: + if this is self.END or not this.isspace(): + return False + else: + if this == "\n": + self._context |= contexts.FAIL_ON_TEXT + elif this is not self.END or not this.isspace(): + self._context |= contexts.HAS_TEXT + return True + else: + if context & contexts.FAIL_ON_EQUALS: + if this == "=": + return False + elif context & contexts.FAIL_ON_LBRACE: + if this == "{" or (self._read(-1) == self._read(-2) == "{"): + if context & contexts.TEMPLATE: + self._context |= contexts.FAIL_ON_EQUALS + else: + self._context |= contexts.FAIL_NEXT + return True + self._context ^= contexts.FAIL_ON_LBRACE + elif context & contexts.FAIL_ON_RBRACE: + if this == "}": + if context & contexts.TEMPLATE: + self._context |= contexts.FAIL_ON_EQUALS + else: + self._context |= contexts.FAIL_NEXT + return True + self._context ^= contexts.FAIL_ON_RBRACE + elif this == "{": + self._context |= contexts.FAIL_ON_LBRACE + elif this == "}": + self._context |= contexts.FAIL_ON_RBRACE + return True + def _parse(self, context=0): """Parse the wikicode string, using *context* for when to stop.""" self._push(context) while True: this = self._read() + unsafe = (contexts.TEMPLATE_NAME | contexts.WIKILINK_TITLE | + contexts.TEMPLATE_PARAM_KEY | contexts.ARGUMENT_NAME) + if self._context & unsafe: + if not self._verify_safe(this): + if self._context & contexts.TEMPLATE_PARAM_KEY: + self._pop() + self._fail_route() if this not in self.MARKERS: self._write_text(this) self._head += 1 @@ -449,6 +480,8 @@ class Tokenizer(object): self._write_text(this) elif this == next == "{": self._parse_template_or_argument() + if self._context & contexts.FAIL_NEXT: + self._context ^= contexts.FAIL_NEXT elif this == "|" and self._context & contexts.TEMPLATE: self._handle_template_param() elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY: @@ -465,6 +498,8 @@ class Tokenizer(object): elif this == next == "[": if not self._context & contexts.WIKILINK_TITLE: self._parse_wikilink() + if self._context & contexts.FAIL_NEXT: + self._context ^= contexts.FAIL_NEXT else: self._write_text("[") elif this == "|" and self._context & contexts.WIKILINK_TITLE: diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py index 625307f..09b7bbb 100644 --- a/mwparserfromhell/smart_list.py +++ b/mwparserfromhell/smart_list.py @@ -41,8 +41,23 @@ def inheritdoc(method): method.__doc__ = getattr(list, method.__name__).__doc__ return method +class _SliceNormalizerMixIn(object): + """MixIn that provides a private method to normalize slices.""" -class SmartList(list): + def _normalize_slice(self, key): + """Return a slice equivalent to the input *key*, standardized.""" + if key.start is not None: + start = (len(self) + key.start) if key.start < 0 else key.start + else: + start = 0 + if key.stop is not None: + stop = (len(self) + key.stop) if key.stop < 0 else key.stop + else: + stop = maxsize + return slice(start, stop, key.step or 1) + + +class SmartList(_SliceNormalizerMixIn, list): """Implements the ``list`` interface with special handling of sublists. When a sublist is created (by ``list[i:j]``), any changes made to this @@ -76,7 +91,8 @@ class SmartList(list): def __getitem__(self, key): if not isinstance(key, slice): return super(SmartList, self).__getitem__(key) - sliceinfo = [key.start or 0, key.stop or 0, key.step or 1] + key = self._normalize_slice(key) + sliceinfo = [key.start, key.stop, key.step] child = _ListProxy(self, sliceinfo) self._children[id(child)] = (child, sliceinfo) return child @@ -86,25 +102,28 @@ class SmartList(list): return super(SmartList, self).__setitem__(key, item) item = list(item) super(SmartList, self).__setitem__(key, item) - diff = len(item) - key.stop + key.start + key = self._normalize_slice(key) + diff = len(item) + (key.start - key.stop) // key.step values = self._children.values if py3k else self._children.itervalues if diff: for child, (start, stop, step) in values(): - if start >= key.stop: + if start > key.stop: self._children[id(child)][1][0] += diff if stop >= key.stop and stop != maxsize: self._children[id(child)][1][1] += diff def __delitem__(self, key): super(SmartList, self).__delitem__(key) - if not isinstance(key, slice): - key = slice(key, key + 1) - diff = key.stop - key.start + if isinstance(key, slice): + key = self._normalize_slice(key) + else: + key = slice(key, key + 1, 1) + diff = (key.stop - key.start) // key.step values = self._children.values if py3k else self._children.itervalues for child, (start, stop, step) in values(): if start > key.start: self._children[id(child)][1][0] -= diff - if stop >= key.stop: + if stop >= key.stop and stop != maxsize: self._children[id(child)][1][1] -= diff if not py3k: @@ -160,24 +179,35 @@ class SmartList(list): child._parent = copy super(SmartList, self).reverse() - @inheritdoc - def sort(self, cmp=None, key=None, reverse=None): - copy = list(self) - for child in self._children: - child._parent = copy - if cmp is not None: + if py3k: + @inheritdoc + def sort(self, key=None, reverse=None): + copy = list(self) + for child in self._children: + child._parent = copy + kwargs = {} if key is not None: - if reverse is not None: - super(SmartList, self).sort(cmp, key, reverse) - else: - super(SmartList, self).sort(cmp, key) - else: - super(SmartList, self).sort(cmp) - else: - super(SmartList, self).sort() + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + super(SmartList, self).sort(**kwargs) + else: + @inheritdoc + def sort(self, cmp=None, key=None, reverse=None): + copy = list(self) + for child in self._children: + child._parent = copy + kwargs = {} + if cmp is not None: + kwargs["cmp"] = cmp + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + super(SmartList, self).sort(**kwargs) -class _ListProxy(list): +class _ListProxy(_SliceNormalizerMixIn, list): """Implement the ``list`` interface by getting elements from a parent. This is created by a :py:class:`~.SmartList` object when slicing. It does @@ -231,25 +261,52 @@ class _ListProxy(list): return bool(self._render()) def __len__(self): - return (self._stop - self._start) / self._step + return (self._stop - self._start) // self._step def __getitem__(self, key): - return self._render()[key] + if isinstance(key, slice): + key = self._normalize_slice(key) + if key.stop == maxsize: + keystop = self._stop + else: + keystop = key.stop + self._start + adjusted = slice(key.start + self._start, keystop, key.step) + return self._parent[adjusted] + else: + return self._render()[key] def __setitem__(self, key, item): if isinstance(key, slice): - adjusted = slice(key.start + self._start, key.stop + self._stop, - key.step) + key = self._normalize_slice(key) + if key.stop == maxsize: + keystop = self._stop + else: + keystop = key.stop + self._start + adjusted = slice(key.start + self._start, keystop, key.step) self._parent[adjusted] = item else: + length = len(self) + if key < 0: + key = length + key + if key < 0 or key >= length: + raise IndexError("list assignment index out of range") self._parent[self._start + key] = item def __delitem__(self, key): if isinstance(key, slice): - adjusted = slice(key.start + self._start, key.stop + self._stop, - key.step) + key = self._normalize_slice(key) + if key.stop == maxsize: + keystop = self._stop + else: + keystop = key.stop + self._start + adjusted = slice(key.start + self._start, keystop, key.step) del self._parent[adjusted] else: + length = len(self) + if key < 0: + key = length + key + if key < 0 or key >= length: + raise IndexError("list assignment index out of range") del self._parent[self._start + key] def __iter__(self): @@ -287,6 +344,16 @@ class _ListProxy(list): self.extend(other) return self + def __mul__(self, other): + return SmartList(list(self) * other) + + def __rmul__(self, other): + return SmartList(other * list(self)) + + def __imul__(self, other): + self.extend(list(self) * (other - 1)) + return self + @property def _start(self): """The starting index of this list, inclusive.""" @@ -295,6 +362,8 @@ class _ListProxy(list): @property def _stop(self): """The ending index of this list, exclusive.""" + if self._sliceinfo[1] == maxsize: + return len(self._parent) return self._sliceinfo[1] @property @@ -328,18 +397,25 @@ class _ListProxy(list): @inheritdoc def insert(self, index, item): + if index < 0: + index = len(self) + index self._parent.insert(self._start + index, item) @inheritdoc def pop(self, index=None): + length = len(self) if index is None: - index = len(self) - 1 + index = length - 1 + elif index < 0: + index = length + index + if index < 0 or index >= length: + raise IndexError("pop index out of range") return self._parent.pop(self._start + index) @inheritdoc def remove(self, item): index = self.index(item) - del self._parent[index] + del self._parent[self._start + index] @inheritdoc def reverse(self): @@ -347,17 +423,30 @@ class _ListProxy(list): item.reverse() self._parent[self._start:self._stop:self._step] = item - @inheritdoc - def sort(self, cmp=None, key=None, reverse=None): - item = self._render() - if cmp is not None: + if py3k: + @inheritdoc + def sort(self, key=None, reverse=None): + item = self._render() + kwargs = {} if key is not None: - if reverse is not None: - item.sort(cmp, key, reverse) - else: - item.sort(cmp, key) - else: - item.sort(cmp) - else: - item.sort() - self._parent[self._start:self._stop:self._step] = item + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + item.sort(**kwargs) + self._parent[self._start:self._stop:self._step] = item + else: + @inheritdoc + def sort(self, cmp=None, key=None, reverse=None): + item = self._render() + kwargs = {} + if cmp is not None: + kwargs["cmp"] = cmp + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + item.sort(**kwargs) + self._parent[self._start:self._stop:self._step] = item + + +del inheritdoc diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index d7a0749..6bee9c4 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -114,6 +114,9 @@ class StringMixIn(object): def __getitem__(self, key): return self.__unicode__()[key] + def __reversed__(self): + return reversed(self.__unicode__()) + def __contains__(self, item): if isinstance(item, StringMixIn): return str(item) in self.__unicode__() @@ -123,6 +126,11 @@ class StringMixIn(object): def capitalize(self): return self.__unicode__().capitalize() + if py3k: + @inheritdoc + def casefold(self): + return self.__unicode__().casefold() + @inheritdoc def center(self, width, fillchar=None): if fillchar is None: @@ -136,19 +144,21 @@ class StringMixIn(object): if not py3k: @inheritdoc def decode(self, encoding=None, errors=None): - if errors is None: - if encoding is None: - return self.__unicode__().decode() - return self.__unicode__().decode(encoding) - return self.__unicode__().decode(encoding, errors) + kwargs = {} + if encoding is not None: + kwargs["encoding"] = encoding + if errors is not None: + kwargs["errors"] = errors + return self.__unicode__().decode(**kwargs) @inheritdoc def encode(self, encoding=None, errors=None): - if errors is None: - if encoding is None: - return self.__unicode__().encode() - return self.__unicode__().encode(encoding) - return self.__unicode__().encode(encoding, errors) + kwargs = {} + if encoding is not None: + kwargs["encoding"] = encoding + if errors is not None: + kwargs["errors"] = errors + return self.__unicode__().encode(**kwargs) @inheritdoc def endswith(self, prefix, start=None, end=None): @@ -168,6 +178,11 @@ class StringMixIn(object): def format(self, *args, **kwargs): return self.__unicode__().format(*args, **kwargs) + if py3k: + @inheritdoc + def format_map(self, mapping): + return self.__unicode__().format_map(mapping) + @inheritdoc def index(self, sub, start=None, end=None): return self.__unicode__().index(sub, start, end) @@ -188,6 +203,11 @@ class StringMixIn(object): def isdigit(self): return self.__unicode__().isdigit() + if py3k: + @inheritdoc + def isidentifier(self): + return self.__unicode__().isidentifier() + @inheritdoc def islower(self): return self.__unicode__().islower() @@ -196,6 +216,11 @@ class StringMixIn(object): def isnumeric(self): return self.__unicode__().isnumeric() + if py3k: + @inheritdoc + def isprintable(self): + return self.__unicode__().isprintable() + @inheritdoc def isspace(self): return self.__unicode__().isspace() @@ -226,12 +251,24 @@ class StringMixIn(object): def lstrip(self, chars=None): return self.__unicode__().lstrip(chars) + if py3k: + @staticmethod + @inheritdoc + def maketrans(self, x, y=None, z=None): + if z is None: + if y is None: + return self.__unicode__.maketrans(x) + return self.__unicode__.maketrans(x, y) + return self.__unicode__.maketrans(x, y, z) + @inheritdoc def partition(self, sep): return self.__unicode__().partition(sep) @inheritdoc - def replace(self, old, new, count): + def replace(self, old, new, count=None): + if count is None: + return self.__unicode__().replace(old, new) return self.__unicode__().replace(old, new, count) @inheritdoc @@ -252,25 +289,45 @@ class StringMixIn(object): def rpartition(self, sep): return self.__unicode__().rpartition(sep) - @inheritdoc - def rsplit(self, sep=None, maxsplit=None): - if maxsplit is None: - if sep is None: - return self.__unicode__().rsplit() - return self.__unicode__().rsplit(sep) - return self.__unicode__().rsplit(sep, maxsplit) + if py3k: + @inheritdoc + def rsplit(self, sep=None, maxsplit=None): + kwargs = {} + if sep is not None: + kwargs["sep"] = sep + if maxsplit is not None: + kwargs["maxsplit"] = maxsplit + return self.__unicode__().rsplit(**kwargs) + else: + @inheritdoc + def rsplit(self, sep=None, maxsplit=None): + if maxsplit is None: + if sep is None: + return self.__unicode__().rsplit() + return self.__unicode__().rsplit(sep) + return self.__unicode__().rsplit(sep, maxsplit) @inheritdoc def rstrip(self, chars=None): return self.__unicode__().rstrip(chars) - @inheritdoc - def split(self, sep=None, maxsplit=None): - if maxsplit is None: - if sep is None: - return self.__unicode__().split() - return self.__unicode__().split(sep) - return self.__unicode__().split(sep, maxsplit) + if py3k: + @inheritdoc + def split(self, sep=None, maxsplit=None): + kwargs = {} + if sep is not None: + kwargs["sep"] = sep + if maxsplit is not None: + kwargs["maxsplit"] = maxsplit + return self.__unicode__().split(**kwargs) + else: + @inheritdoc + def split(self, sep=None, maxsplit=None): + if maxsplit is None: + if sep is None: + return self.__unicode__().split() + return self.__unicode__().split(sep) + return self.__unicode__().split(sep, maxsplit) @inheritdoc def splitlines(self, keepends=None): diff --git a/setup.py b/setup.py index 445473e..8b4ae86 100644 --- a/setup.py +++ b/setup.py @@ -24,6 +24,7 @@ from setuptools import setup, find_packages, Extension from mwparserfromhell import __version__ +from mwparserfromhell.compat import py3k with open("README.rst") as fp: long_docs = fp.read() @@ -37,7 +38,7 @@ tokenizer = Extension("mwparserfromhell.parser._tokenizer", setup( name = "mwparserfromhell", packages = find_packages(exclude=("tests",)), - ext_modules = [tokenizer], + ext_modules = [] if py3k else [tokenizer], test_suite = "tests", version = __version__, author = "Ben Kurtovic", diff --git a/tests/MWPFHTestCase.tmlanguage b/tests/MWPFHTestCase.tmlanguage new file mode 100644 index 0000000..e6ea7f0 --- /dev/null +++ b/tests/MWPFHTestCase.tmlanguage @@ -0,0 +1,130 @@ + + + + + fileTypes + + mwtest + + name + MWParserFromHell Test Case + patterns + + + match + --- + name + markup.heading.divider.mwpfh + + + captures + + 1 + + name + keyword.other.name.mwpfh + + 2 + + name + variable.other.name.mwpfh + + + match + (name:)\s*(\w*) + name + meta.name.mwpfh + + + captures + + 1 + + name + keyword.other.label.mwpfh + + 2 + + name + comment.line.other.label.mwpfh + + + match + (label:)\s*(.*) + name + meta.label.mwpfh + + + captures + + 1 + + name + keyword.other.input.mwpfh + + 2 + + name + string.quoted.double.input.mwpfh + + + match + (input:)\s*(.*) + name + meta.input.mwpfh + + + captures + + 1 + + name + keyword.other.output.mwpfh + + + match + (output:) + name + meta.output.mwpfh + + + captures + + 1 + + name + support.language.token.mwpfh + + + match + (\w+)\s*\( + name + meta.name.token.mwpfh + + + captures + + 1 + + name + variable.parameter.token.mwpfh + + + match + (\w+)\s*(=) + name + meta.name.parameter.token.mwpfh + + + match + ".*?" + name + string.quoted.double.mwpfh + + + scopeName + text.mwpfh + uuid + cd3e2ffa-a57d-4c40-954f-1a2e87ffd638 + + diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py new file mode 100644 index 0000000..379b4fa --- /dev/null +++ b/tests/_test_tokenizer.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import print_function, unicode_literals +from os import listdir, path + +from mwparserfromhell.compat import py3k +from mwparserfromhell.parser import tokens + +class _TestParseError(Exception): + """Raised internally when a test could not be parsed.""" + pass + + +class TokenizerTestCase(object): + """A base test case for tokenizers, whose tests are loaded dynamically. + + Subclassed along with unittest.TestCase to form TestPyTokenizer and + TestCTokenizer. Tests are loaded dynamically from files in the 'tokenizer' + directory. + """ + + @classmethod + def _build_test_method(cls, funcname, data): + """Create and return a method to be treated as a test case method. + + *data* is a dict containing multiple keys: the *input* text to be + tokenized, the expected list of tokens as *output*, and an optional + *label* for the method's docstring. + """ + def inner(self): + expected = data["output"] + actual = self.tokenizer().tokenize(data["input"]) + self.assertEqual(expected, actual) + if not py3k: + inner.__name__ = funcname.encode("utf8") + inner.__doc__ = data["label"] + return inner + + @classmethod + def _load_tests(cls, filename, text): + """Load all tests in *text* from the file *filename*.""" + tests = text.split("\n---\n") + counter = 1 + digits = len(str(len(tests))) + for test in tests: + data = {"name": None, "label": None, "input": None, "output": None} + try: + for line in test.strip().splitlines(): + if line.startswith("name:"): + data["name"] = line[len("name:"):].strip() + elif line.startswith("label:"): + data["label"] = line[len("label:"):].strip() + elif line.startswith("input:"): + raw = line[len("input:"):].strip() + if raw[0] == '"' and raw[-1] == '"': + raw = raw[1:-1] + raw = raw.encode("raw_unicode_escape") + data["input"] = raw.decode("unicode_escape") + elif line.startswith("output:"): + raw = line[len("output:"):].strip() + try: + data["output"] = eval(raw, vars(tokens)) + except Exception as err: + raise _TestParseError(err) + except _TestParseError as err: + if data["name"]: + error = "Could not parse test '{0}' in '{1}':\n\t{2}" + print(error.format(data["name"], filename, err)) + else: + error = "Could not parse a test in '{0}':\n\t{1}" + print(error.format(filename, err)) + continue + if not data["name"]: + error = "A test in '{0}' was ignored because it lacked a name" + print(error.format(filename)) + continue + if data["input"] is None or data["output"] is None: + error = "Test '{0}' in '{1}' was ignored because it lacked an input or an output" + print(error.format(data["name"], filename)) + continue + number = str(counter).zfill(digits) + fname = "test_{0}{1}_{2}".format(filename, number, data["name"]) + meth = cls._build_test_method(fname, data) + setattr(cls, fname, meth) + counter += 1 + + @classmethod + def build(cls): + """Load and install all tests from the 'tokenizer' directory.""" + directory = path.join(path.dirname(__file__), "tokenizer") + extension = ".mwtest" + for filename in listdir(directory): + if not filename.endswith(extension): + continue + with open(path.join(directory, filename), "r") as fp: + text = fp.read() + if not py3k: + text = text.decode("utf8") + cls._load_tests(filename[:0-len(extension)], text) + +TokenizerTestCase.build() diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py new file mode 100644 index 0000000..758a72e --- /dev/null +++ b/tests/_test_tree_equality.py @@ -0,0 +1,113 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +from unittest import TestCase + +from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, + Tag, Template, Text, Wikilink) +from mwparserfromhell.nodes.extras import Attribute, Parameter +from mwparserfromhell.wikicode import Wikicode + +class TreeEqualityTestCase(TestCase): + """A base test case with support for comparing the equality of node trees. + + This adds a number of type equality functions, for Wikicode, Text, + Templates, and Wikilinks. + """ + + def assertNodeEqual(self, expected, actual): + """Assert that two Nodes have the same type and have the same data.""" + registry = { + Argument: self.assertArgumentNodeEqual, + Comment: self.assertCommentNodeEqual, + Heading: self.assertHeadingNodeEqual, + HTMLEntity: self.assertHTMLEntityNodeEqual, + Tag: self.assertTagNodeEqual, + Template: self.assertTemplateNodeEqual, + Text: self.assertTextNodeEqual, + Wikilink: self.assertWikilinkNodeEqual + } + for nodetype in registry: + if isinstance(expected, nodetype): + self.assertIsInstance(actual, nodetype) + registry[nodetype](expected, actual) + + def assertArgumentNodeEqual(self, expected, actual): + """Assert that two Argument nodes have the same data.""" + self.assertWikicodeEqual(expected.name, actual.name) + if expected.default is not None: + self.assertWikicodeEqual(expected.default, actual.default) + else: + self.assertIs(None, actual.default) + + def assertCommentNodeEqual(self, expected, actual): + """Assert that two Comment nodes have the same data.""" + self.assertWikicodeEqual(expected.contents, actual.contents) + + def assertHeadingNodeEqual(self, expected, actual): + """Assert that two Heading nodes have the same data.""" + self.assertWikicodeEqual(expected.title, actual.title) + self.assertEqual(expected.level, actual.level) + + def assertHTMLEntityNodeEqual(self, expected, actual): + """Assert that two HTMLEntity nodes have the same data.""" + self.assertEqual(expected.value, actual.value) + self.assertIs(expected.named, actual.named) + self.assertIs(expected.hexadecimal, actual.hexadecimal) + self.assertEqual(expected.hex_char, actual.hex_char) + + def assertTagNodeEqual(self, expected, actual): + """Assert that two Tag nodes have the same data.""" + self.fail("Holding this until feature/html_tags is ready.") + + def assertTemplateNodeEqual(self, expected, actual): + """Assert that two Template nodes have the same data.""" + self.assertWikicodeEqual(expected.name, actual.name) + length = len(expected.params) + self.assertEqual(length, len(actual.params)) + for i in range(length): + exp_param = expected.params[i] + act_param = actual.params[i] + self.assertWikicodeEqual(exp_param.name, act_param.name) + self.assertWikicodeEqual(exp_param.value, act_param.value) + self.assertIs(exp_param.showkey, act_param.showkey) + + def assertTextNodeEqual(self, expected, actual): + """Assert that two Text nodes have the same data.""" + self.assertEqual(expected.value, actual.value) + + def assertWikilinkNodeEqual(self, expected, actual): + """Assert that two Wikilink nodes have the same data.""" + self.assertWikicodeEqual(expected.title, actual.title) + if expected.text is not None: + self.assertWikicodeEqual(expected.text, actual.text) + else: + self.assertIs(None, actual.text) + + def assertWikicodeEqual(self, expected, actual): + """Assert that two Wikicode objects have the same data.""" + self.assertIsInstance(actual, Wikicode) + length = len(expected.nodes) + self.assertEqual(length, len(actual.nodes)) + for i in range(length): + self.assertNodeEqual(expected.get(i), actual.get(i)) diff --git a/tests/compat.py b/tests/compat.py new file mode 100644 index 0000000..8bed40e --- /dev/null +++ b/tests/compat.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- + +""" +Serves the same purpose as mwparserfromhell.compat, but only for objects +required by unit tests. This avoids unnecessary imports (like urllib) within +the main library. +""" + +from mwparserfromhell.compat import py3k + +if py3k: + range = range + from io import StringIO + from urllib.parse import urlencode + from urllib.request import urlopen + +else: + range = xrange + from StringIO import StringIO + from urllib import urlencode, urlopen diff --git a/tests/test_builder.py b/tests/test_builder.py new file mode 100644 index 0000000..1e578ed --- /dev/null +++ b/tests/test_builder.py @@ -0,0 +1,261 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, + Tag, Template, Text, Wikilink) +from mwparserfromhell.nodes.extras import Attribute, Parameter +from mwparserfromhell.parser import tokens +from mwparserfromhell.parser.builder import Builder +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.wikicode import Wikicode + +from ._test_tree_equality import TreeEqualityTestCase + +wrap = lambda L: Wikicode(SmartList(L)) + +class TestBuilder(TreeEqualityTestCase): + """Tests for the builder, which turns tokens into Wikicode objects.""" + + def setUp(self): + self.builder = Builder() + + def test_text(self): + """tests for building Text nodes""" + tests = [ + ([tokens.Text(text="foobar")], wrap([Text("foobar")])), + ([tokens.Text(text="fóóbar")], wrap([Text("fóóbar")])), + ([tokens.Text(text="spam"), tokens.Text(text="eggs")], + wrap([Text("spam"), Text("eggs")])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) + + def test_template(self): + """tests for building Template nodes""" + tests = [ + ([tokens.TemplateOpen(), tokens.Text(text="foobar"), + tokens.TemplateClose()], + wrap([Template(wrap([Text("foobar")]))])), + + ([tokens.TemplateOpen(), tokens.Text(text="spam"), + tokens.Text(text="eggs"), tokens.TemplateClose()], + wrap([Template(wrap([Text("spam"), Text("eggs")]))])), + + ([tokens.TemplateOpen(), tokens.Text(text="foo"), + tokens.TemplateParamSeparator(), tokens.Text(text="bar"), + tokens.TemplateClose()], + wrap([Template(wrap([Text("foo")]), params=[ + Parameter(wrap([Text("1")]), wrap([Text("bar")]), + showkey=False)])])), + + ([tokens.TemplateOpen(), tokens.Text(text="foo"), + tokens.TemplateParamSeparator(), tokens.Text(text="bar"), + tokens.TemplateParamEquals(), tokens.Text(text="baz"), + tokens.TemplateClose()], + wrap([Template(wrap([Text("foo")]), params=[ + Parameter(wrap([Text("bar")]), wrap([Text("baz")]))])])), + + ([tokens.TemplateOpen(), tokens.Text(text="foo"), + tokens.TemplateParamSeparator(), tokens.Text(text="bar"), + tokens.TemplateParamEquals(), tokens.Text(text="baz"), + tokens.TemplateParamSeparator(), tokens.Text(text="biz"), + tokens.TemplateParamSeparator(), tokens.Text(text="buzz"), + tokens.TemplateParamSeparator(), tokens.Text(text="3"), + tokens.TemplateParamEquals(), tokens.Text(text="buff"), + tokens.TemplateParamSeparator(), tokens.Text(text="baff"), + tokens.TemplateClose()], + wrap([Template(wrap([Text("foo")]), params=[ + Parameter(wrap([Text("bar")]), wrap([Text("baz")])), + Parameter(wrap([Text("1")]), wrap([Text("biz")]), + showkey=False), + Parameter(wrap([Text("2")]), wrap([Text("buzz")]), + showkey=False), + Parameter(wrap([Text("3")]), wrap([Text("buff")])), + Parameter(wrap([Text("3")]), wrap([Text("baff")]), + showkey=False)])])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) + + def test_argument(self): + """tests for building Argument nodes""" + tests = [ + ([tokens.ArgumentOpen(), tokens.Text(text="foobar"), + tokens.ArgumentClose()], + wrap([Argument(wrap([Text("foobar")]))])), + + ([tokens.ArgumentOpen(), tokens.Text(text="spam"), + tokens.Text(text="eggs"), tokens.ArgumentClose()], + wrap([Argument(wrap([Text("spam"), Text("eggs")]))])), + + ([tokens.ArgumentOpen(), tokens.Text(text="foo"), + tokens.ArgumentSeparator(), tokens.Text(text="bar"), + tokens.ArgumentClose()], + wrap([Argument(wrap([Text("foo")]), wrap([Text("bar")]))])), + + ([tokens.ArgumentOpen(), tokens.Text(text="foo"), + tokens.Text(text="bar"), tokens.ArgumentSeparator(), + tokens.Text(text="baz"), tokens.Text(text="biz"), + tokens.ArgumentClose()], + wrap([Argument(wrap([Text("foo"), Text("bar")]), + wrap([Text("baz"), Text("biz")]))])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) + + def test_wikilink(self): + """tests for building Wikilink nodes""" + tests = [ + ([tokens.WikilinkOpen(), tokens.Text(text="foobar"), + tokens.WikilinkClose()], + wrap([Wikilink(wrap([Text("foobar")]))])), + + ([tokens.WikilinkOpen(), tokens.Text(text="spam"), + tokens.Text(text="eggs"), tokens.WikilinkClose()], + wrap([Wikilink(wrap([Text("spam"), Text("eggs")]))])), + + ([tokens.WikilinkOpen(), tokens.Text(text="foo"), + tokens.WikilinkSeparator(), tokens.Text(text="bar"), + tokens.WikilinkClose()], + wrap([Wikilink(wrap([Text("foo")]), wrap([Text("bar")]))])), + + ([tokens.WikilinkOpen(), tokens.Text(text="foo"), + tokens.Text(text="bar"), tokens.WikilinkSeparator(), + tokens.Text(text="baz"), tokens.Text(text="biz"), + tokens.WikilinkClose()], + wrap([Wikilink(wrap([Text("foo"), Text("bar")]), + wrap([Text("baz"), Text("biz")]))])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) + + def test_html_entity(self): + """tests for building HTMLEntity nodes""" + tests = [ + ([tokens.HTMLEntityStart(), tokens.Text(text="nbsp"), + tokens.HTMLEntityEnd()], + wrap([HTMLEntity("nbsp", named=True, hexadecimal=False)])), + + ([tokens.HTMLEntityStart(), tokens.HTMLEntityNumeric(), + tokens.Text(text="107"), tokens.HTMLEntityEnd()], + wrap([HTMLEntity("107", named=False, hexadecimal=False)])), + + ([tokens.HTMLEntityStart(), tokens.HTMLEntityNumeric(), + tokens.HTMLEntityHex(char="X"), tokens.Text(text="6B"), + tokens.HTMLEntityEnd()], + wrap([HTMLEntity("6B", named=False, hexadecimal=True, + hex_char="X")])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) + + def test_heading(self): + """tests for building Heading nodes""" + tests = [ + ([tokens.HeadingStart(level=2), tokens.Text(text="foobar"), + tokens.HeadingEnd()], + wrap([Heading(wrap([Text("foobar")]), 2)])), + + ([tokens.HeadingStart(level=4), tokens.Text(text="spam"), + tokens.Text(text="eggs"), tokens.HeadingEnd()], + wrap([Heading(wrap([Text("spam"), Text("eggs")]), 4)])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) + + def test_comment(self): + """tests for building Comment nodes""" + tests = [ + ([tokens.CommentStart(), tokens.Text(text="foobar"), + tokens.CommentEnd()], + wrap([Comment(wrap([Text("foobar")]))])), + + ([tokens.CommentStart(), tokens.Text(text="spam"), + tokens.Text(text="eggs"), tokens.CommentEnd()], + wrap([Comment(wrap([Text("spam"), Text("eggs")]))])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) + + @unittest.skip("holding this until feature/html_tags is ready") + def test_tag(self): + """tests for building Tag nodes""" + pass + + def test_integration(self): + """a test for building a combination of templates together""" + # {{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}} + test = [tokens.TemplateOpen(), tokens.TemplateOpen(), + tokens.TemplateOpen(), tokens.TemplateOpen(), + tokens.Text(text="foo"), tokens.TemplateClose(), + tokens.Text(text="bar"), tokens.TemplateParamSeparator(), + tokens.Text(text="baz"), tokens.TemplateParamEquals(), + tokens.Text(text="biz"), tokens.TemplateClose(), + tokens.Text(text="buzz"), tokens.TemplateClose(), + tokens.Text(text="usr"), tokens.TemplateParamSeparator(), + tokens.TemplateOpen(), tokens.Text(text="bin"), + tokens.TemplateClose(), tokens.TemplateClose()] + valid = wrap( + [Template(wrap([Template(wrap([Template(wrap([Template(wrap([Text( + "foo")])), Text("bar")]), params=[Parameter(wrap([Text("baz")]), + wrap([Text("biz")]))]), Text("buzz")])), Text("usr")]), params=[ + Parameter(wrap([Text("1")]), wrap([Template(wrap([Text("bin")]))]), + showkey=False)])]) + self.assertWikicodeEqual(valid, self.builder.build(test)) + + def test_integration2(self): + """an even more audacious test for building a horrible wikicode mess""" + # {{a|b|{{c|[[d]]{{{e}}}}}}}[[f|{{{g}}}]]{{i|j= }} + test = [tokens.TemplateOpen(), tokens.Text(text="a"), + tokens.TemplateParamSeparator(), tokens.Text(text="b"), + tokens.TemplateParamSeparator(), tokens.TemplateOpen(), + tokens.Text(text="c"), tokens.TemplateParamSeparator(), + tokens.WikilinkOpen(), tokens.Text(text="d"), + tokens.WikilinkClose(), tokens.ArgumentOpen(), + tokens.Text(text="e"), tokens.ArgumentClose(), + tokens.TemplateClose(), tokens.TemplateClose(), + tokens.WikilinkOpen(), tokens.Text(text="f"), + tokens.WikilinkSeparator(), tokens.ArgumentOpen(), + tokens.Text(text="g"), tokens.ArgumentClose(), + tokens.CommentStart(), tokens.Text(text="h"), + tokens.CommentEnd(), tokens.WikilinkClose(), + tokens.TemplateOpen(), tokens.Text(text="i"), + tokens.TemplateParamSeparator(), tokens.Text(text="j"), + tokens.TemplateParamEquals(), tokens.HTMLEntityStart(), + tokens.Text(text="nbsp"), tokens.HTMLEntityEnd(), + tokens.TemplateClose()] + valid = wrap( + [Template(wrap([Text("a")]), params=[Parameter(wrap([Text("1")]), + wrap([Text("b")]), showkey=False), Parameter(wrap([Text("2")]), + wrap([Template(wrap([Text("c")]), params=[Parameter(wrap([Text("1") + ]), wrap([Wikilink(wrap([Text("d")])), Argument(wrap([Text("e")]))] + ), showkey=False)])]), showkey=False)]), Wikilink(wrap([Text("f")] + ), wrap([Argument(wrap([Text("g")])), Comment(wrap([Text("h")]))]) + ), Template(wrap([Text("i")]), params=[Parameter(wrap([Text("j")]), + wrap([HTMLEntity("nbsp", named=True)]))])]) + self.assertWikicodeEqual(valid, self.builder.build(test)) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py new file mode 100644 index 0000000..7a082e8 --- /dev/null +++ b/tests/test_ctokenizer.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +try: + from mwparserfromhell.parser._tokenizer import CTokenizer +except ImportError: + CTokenizer = None + +from ._test_tokenizer import TokenizerTestCase + +@unittest.skipUnless(CTokenizer, "C tokenizer not available") +class TestCTokenizer(TokenizerTestCase, unittest.TestCase): + """Test cases for the C tokenizer.""" + + @classmethod + def setUpClass(cls): + cls.tokenizer = CTokenizer + + def test_uses_c(self): + """make sure the C tokenizer identifies as using a C extension""" + self.assertTrue(CTokenizer.USES_C) + self.assertTrue(CTokenizer().USES_C) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_docs.py b/tests/test_docs.py new file mode 100644 index 0000000..8d95c47 --- /dev/null +++ b/tests/test_docs.py @@ -0,0 +1,131 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import print_function, unicode_literals +import json +import unittest + +import mwparserfromhell +from mwparserfromhell.compat import py3k, str + +from .compat import StringIO, urlencode, urlopen + +class TestDocs(unittest.TestCase): + """Integration test cases for mwparserfromhell's documentation.""" + + def assertPrint(self, input, output): + """Assertion check that *input*, when printed, produces *output*.""" + buff = StringIO() + print(input, end="", file=buff) + buff.seek(0) + self.assertEqual(output, buff.read()) + + def test_readme_1(self): + """test a block of example code in the README""" + text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" + wikicode = mwparserfromhell.parse(text) + self.assertPrint(wikicode, + "I has a template! {{foo|bar|baz|eggs=spam}} See it?") + templates = wikicode.filter_templates() + if py3k: + self.assertPrint(templates, "['{{foo|bar|baz|eggs=spam}}']") + else: + self.assertPrint(templates, "[u'{{foo|bar|baz|eggs=spam}}']") + template = templates[0] + self.assertPrint(template.name, "foo") + if py3k: + self.assertPrint(template.params, "['bar', 'baz', 'eggs=spam']") + else: + self.assertPrint(template.params, "[u'bar', u'baz', u'eggs=spam']") + self.assertPrint(template.get(1).value, "bar") + self.assertPrint(template.get("eggs").value, "spam") + + def test_readme_2(self): + """test a block of example code in the README""" + code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") + if py3k: + self.assertPrint(code.filter_templates(), + "['{{foo|this {{includes a|template}}}}']") + else: + self.assertPrint(code.filter_templates(), + "[u'{{foo|this {{includes a|template}}}}']") + foo = code.filter_templates()[0] + self.assertPrint(foo.get(1).value, "this {{includes a|template}}") + self.assertPrint(foo.get(1).value.filter_templates()[0], + "{{includes a|template}}") + self.assertPrint(foo.get(1).value.filter_templates()[0].get(1).value, + "template") + + def test_readme_3(self): + """test a block of example code in the README""" + text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" + temps = mwparserfromhell.parse(text).filter_templates(recursive=True) + if py3k: + res = "['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']" + else: + res = "[u'{{foo|{{bar}}={{baz|{{spam}}}}}}', u'{{bar}}', u'{{baz|{{spam}}}}', u'{{spam}}']" + self.assertPrint(temps, res) + + def test_readme_4(self): + """test a block of example code in the README""" + text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}" + code = mwparserfromhell.parse(text) + for template in code.filter_templates(): + if template.name == "cleanup" and not template.has_param("date"): + template.add("date", "July 2012") + res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}}" + self.assertPrint(code, res) + code.replace("{{uncategorized}}", "{{bar-stub}}") + res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}" + self.assertPrint(code, res) + if py3k: + res = "['{{cleanup|date=July 2012}}', '{{bar-stub}}']" + else: + res = "[u'{{cleanup|date=July 2012}}', u'{{bar-stub}}']" + self.assertPrint(code.filter_templates(), res) + text = str(code) + res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}" + self.assertPrint(text, res) + self.assertEqual(text, code) + + def test_readme_5(self): + """test a block of example code in the README; includes a web call""" + url1 = "http://en.wikipedia.org/w/api.php" + url2 = "http://en.wikipedia.org/w/index.php?title={0}&action=raw" + title = "Test" + data = {"action": "query", "prop": "revisions", "rvlimit": 1, + "rvprop": "content", "format": "json", "titles": title} + try: + raw = urlopen(url1, urlencode(data).encode("utf8")).read() + except IOError: + self.skipTest("cannot continue because of unsuccessful web call") + res = json.loads(raw.decode("utf8")) + text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"] + try: + expected = urlopen(url2.format(title)).read().decode("utf8") + except IOError: + self.skipTest("cannot continue because of unsuccessful web call") + actual = mwparserfromhell.parse(text) + self.assertEqual(expected, actual) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_parameter.py b/tests/test_parameter.py deleted file mode 100644 index 2d5515b..0000000 --- a/tests/test_parameter.py +++ /dev/null @@ -1,119 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2012 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import unittest - -from mwparserfromhell.parameter import Parameter -from mwparserfromhell.template import Template - -class TestParameter(unittest.TestCase): - def setUp(self): - self.name = "foo" - self.value1 = "bar" - self.value2 = "{{spam}}" - self.value3 = "bar{{spam}}" - self.value4 = "embedded {{eggs|spam|baz=buz}} {{goes}} here" - self.templates2 = [Template("spam")] - self.templates3 = [Template("spam")] - self.templates4 = [Template("eggs", [Parameter("1", "spam"), - Parameter("baz", "buz")]), - Template("goes")] - - def test_construct(self): - Parameter(self.name, self.value1) - Parameter(self.name, self.value2, self.templates2) - Parameter(name=self.name, value=self.value3) - Parameter(name=self.name, value=self.value4, templates=self.templates4) - - def test_name(self): - params = [ - Parameter(self.name, self.value1), - Parameter(self.name, self.value2, self.templates2), - Parameter(name=self.name, value=self.value3), - Parameter(name=self.name, value=self.value4, - templates=self.templates4) - ] - for param in params: - self.assertEqual(param.name, self.name) - - def test_value(self): - tests = [ - (Parameter(self.name, self.value1), self.value1), - (Parameter(self.name, self.value2, self.templates2), self.value2), - (Parameter(name=self.name, value=self.value3), self.value3), - (Parameter(name=self.name, value=self.value4, - templates=self.templates4), self.value4) - ] - for param, correct in tests: - self.assertEqual(param.value, correct) - - def test_templates(self): - tests = [ - (Parameter(self.name, self.value3, self.templates3), - self.templates3), - (Parameter(name=self.name, value=self.value4, - templates=self.templates4), self.templates4) - ] - for param, correct in tests: - self.assertEqual(param.templates, correct) - - def test_magic(self): - params = [Parameter(self.name, self.value1), - Parameter(self.name, self.value2, self.templates2), - Parameter(self.name, self.value3, self.templates3), - Parameter(self.name, self.value4, self.templates4)] - for param in params: - self.assertEqual(repr(param), repr(param.value)) - self.assertEqual(str(param), str(param.value)) - self.assertIs(param < "eggs", param.value < "eggs") - self.assertIs(param <= "bar{{spam}}", param.value <= "bar{{spam}}") - self.assertIs(param == "bar", param.value == "bar") - self.assertIs(param != "bar", param.value != "bar") - self.assertIs(param > "eggs", param.value > "eggs") - self.assertIs(param >= "bar{{spam}}", param.value >= "bar{{spam}}") - self.assertEquals(bool(param), bool(param.value)) - self.assertEquals(len(param), len(param.value)) - self.assertEquals(list(param), list(param.value)) - self.assertEquals(param[2], param.value[2]) - self.assertEquals(list(reversed(param)), - list(reversed(param.value))) - self.assertIs("bar" in param, "bar" in param.value) - self.assertEquals(param + "test", param.value + "test") - self.assertEquals("test" + param, "test" + param.value) - # add param - # add template left - # add template right - - self.assertEquals(param * 3, Parameter(param.name, param.value * 3, - param.templates * 3)) - self.assertEquals(3 * param, Parameter(param.name, 3 * param.value, - 3 * param.templates)) - - # add param inplace - # add template implace - # add str inplace - # multiply int inplace - self.assertIsInstance(param, Parameter) - self.assertIsInstance(param.value, str) - -if __name__ == "__main__": - unittest.main(verbosity=2) diff --git a/tests/test_parser.py b/tests/test_parser.py index 0c989b8..9d2c969 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,44 +20,50 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +from __future__ import unicode_literals import unittest -from mwparserfromhell.parameter import Parameter -from mwparserfromhell.parser import Parser -from mwparserfromhell.template import Template +from mwparserfromhell import parser +from mwparserfromhell.nodes import Template, Text, Wikilink +from mwparserfromhell.nodes.extras import Parameter +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.wikicode import Wikicode -TESTS = [ - ("", []), - ("abcdef ghijhk", []), - ("abc{this is not a template}def", []), - ("neither is {{this one}nor} {this one {despite}} containing braces", []), - ("this is an acceptable {{template}}", [Template("template")]), - ("{{multiple}}{{templates}}", [Template("multiple"), - Template("templates")]), - ("multiple {{-}} templates {{+}}!", [Template("-"), Template("+")]), - ("{{{no templates here}}}", []), - ("{ {{templates here}}}", [Template("templates here")]), - ("{{{{I do not exist}}}}", []), - ("{{foo|bar|baz|eggs=spam}}", - [Template("foo", [Parameter("1", "bar"), Parameter("2", "baz"), - Parameter("eggs", "spam")])]), - ("{{abc def|ghi|jk=lmno|pqr|st=uv|wx|yz}}", - [Template("abc def", [Parameter("1", "ghi"), Parameter("jk", "lmno"), - Parameter("2", "pqr"), Parameter("st", "uv"), - Parameter("3", "wx"), Parameter("4", "yz")])]), - ("{{this has a|{{template}}|inside of it}}", - [Template("this has a", [Parameter("1", "{{template}}", - [Template("template")]), - Parameter("2", "inside of it")])]), - ("{{{{I exist}} }}", [Template("I exist", [] )]), - ("{{}}") -] +from ._test_tree_equality import TreeEqualityTestCase +from .compat import range -class TestParser(unittest.TestCase): - def test_parse(self): - parser = Parser() - for unparsed, parsed in TESTS: - self.assertEqual(parser.parse(unparsed), parsed) +class TestParser(TreeEqualityTestCase): + """Tests for the Parser class itself, which tokenizes and builds nodes.""" + + def test_use_c(self): + """make sure the correct tokenizer is used""" + if parser.use_c: + self.assertTrue(parser.Parser(None)._tokenizer.USES_C) + parser.use_c = False + self.assertFalse(parser.Parser(None)._tokenizer.USES_C) + + def test_parsing(self): + """integration test for parsing overall""" + text = "this is text; {{this|is=a|template={{with|[[links]]|in}}it}}" + wrap = lambda L: Wikicode(SmartList(L)) + expected = wrap([ + Text("this is text; "), + Template(wrap([Text("this")]), [ + Parameter(wrap([Text("is")]), wrap([Text("a")])), + Parameter(wrap([Text("template")]), wrap([ + Template(wrap([Text("with")]), [ + Parameter(wrap([Text("1")]), + wrap([Wikilink(wrap([Text("links")]))]), + showkey=False), + Parameter(wrap([Text("2")]), + wrap([Text("in")]), showkey=False) + ]), + Text("it") + ])) + ]) + ]) + actual = parser.Parser(text).parse() + self.assertWikicodeEqual(expected, actual) if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py new file mode 100644 index 0000000..697c7e5 --- /dev/null +++ b/tests/test_pytokenizer.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.parser.tokenizer import Tokenizer + +from ._test_tokenizer import TokenizerTestCase + +class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): + """Test cases for the Python tokenizer.""" + + @classmethod + def setUpClass(cls): + cls.tokenizer = Tokenizer + + def test_uses_c(self): + """make sure the Python tokenizer identifies as not using C""" + self.assertFalse(Tokenizer.USES_C) + self.assertFalse(Tokenizer().USES_C) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py new file mode 100644 index 0000000..25df555 --- /dev/null +++ b/tests/test_smart_list.py @@ -0,0 +1,392 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import py3k +from mwparserfromhell.smart_list import SmartList, _ListProxy + +from .compat import range + +class TestSmartList(unittest.TestCase): + """Test cases for the SmartList class and its child, _ListProxy.""" + + def _test_get_set_del_item(self, builder): + """Run tests on __get/set/delitem__ of a list built with *builder*.""" + def assign(L, s1, s2, s3, val): + L[s1:s2:s3] = val + def delete(L, s1): + del L[s1] + + list1 = builder([0, 1, 2, 3, "one", "two"]) + list2 = builder(list(range(10))) + + self.assertEqual(1, list1[1]) + self.assertEqual("one", list1[-2]) + self.assertEqual([2, 3], list1[2:4]) + self.assertRaises(IndexError, lambda: list1[6]) + self.assertRaises(IndexError, lambda: list1[-7]) + + self.assertEqual([0, 1, 2], list1[:3]) + self.assertEqual([0, 1, 2, 3, "one", "two"], list1[:]) + self.assertEqual([3, "one", "two"], list1[3:]) + self.assertEqual(["one", "two"], list1[-2:]) + self.assertEqual([0, 1], list1[:-4]) + self.assertEqual([], list1[6:]) + self.assertEqual([], list1[4:2]) + + self.assertEqual([0, 2, "one"], list1[0:5:2]) + self.assertEqual([0, 2], list1[0:-3:2]) + self.assertEqual([0, 1, 2, 3, "one", "two"], list1[::]) + self.assertEqual([2, 3, "one", "two"], list1[2::]) + self.assertEqual([0, 1, 2, 3], list1[:4:]) + self.assertEqual([2, 3], list1[2:4:]) + self.assertEqual([0, 2, 4, 6, 8], list2[::2]) + self.assertEqual([2, 5, 8], list2[2::3]) + self.assertEqual([0, 3], list2[:6:3]) + self.assertEqual([2, 5, 8], list2[-8:9:3]) + self.assertEqual([], list2[100000:1000:-100]) + + list1[3] = 100 + self.assertEqual(100, list1[3]) + list1[-3] = 101 + self.assertEqual([0, 1, 2, 101, "one", "two"], list1) + list1[5:] = [6, 7, 8] + self.assertEqual([6, 7, 8], list1[5:]) + self.assertEqual([0, 1, 2, 101, "one", 6, 7, 8], list1) + list1[2:4] = [-1, -2, -3, -4, -5] + self.assertEqual([0, 1, -1, -2, -3, -4, -5, "one", 6, 7, 8], list1) + list1[0:-3] = [99] + self.assertEqual([99, 6, 7, 8], list1) + list2[0:6:2] = [100, 102, 104] + self.assertEqual([100, 1, 102, 3, 104, 5, 6, 7, 8, 9], list2) + list2[::3] = [200, 203, 206, 209] + self.assertEqual([200, 1, 102, 203, 104, 5, 206, 7, 8, 209], list2) + list2[::] = range(7) + self.assertEqual([0, 1, 2, 3, 4, 5, 6], list2) + self.assertRaises(ValueError, assign, list2, 0, 5, 2, + [100, 102, 104, 106]) + + del list2[2] + self.assertEqual([0, 1, 3, 4, 5, 6], list2) + del list2[-3] + self.assertEqual([0, 1, 3, 5, 6], list2) + self.assertRaises(IndexError, delete, list2, 100) + self.assertRaises(IndexError, delete, list2, -6) + list2[:] = range(10) + del list2[3:6] + self.assertEqual([0, 1, 2, 6, 7, 8, 9], list2) + del list2[-2:] + self.assertEqual([0, 1, 2, 6, 7], list2) + del list2[:2] + self.assertEqual([2, 6, 7], list2) + list2[:] = range(10) + del list2[2:8:2] + self.assertEqual([0, 1, 3, 5, 7, 8, 9], list2) + + def _test_add_radd_iadd(self, builder): + """Run tests on __r/i/add__ of a list built with *builder*.""" + list1 = builder(range(5)) + list2 = builder(range(5, 10)) + self.assertEqual([0, 1, 2, 3, 4, 5, 6], list1 + [5, 6]) + self.assertEqual([0, 1, 2, 3, 4], list1) + self.assertEqual(list(range(10)), list1 + list2) + self.assertEqual([-2, -1, 0, 1, 2, 3, 4], [-2, -1] + list1) + self.assertEqual([0, 1, 2, 3, 4], list1) + list1 += ["foo", "bar", "baz"] + self.assertEqual([0, 1, 2, 3, 4, "foo", "bar", "baz"], list1) + + def _test_other_magic_methods(self, builder): + """Run tests on other magic methods of a list built with *builder*.""" + list1 = builder([0, 1, 2, 3, "one", "two"]) + list2 = builder([]) + list3 = builder([0, 2, 3, 4]) + list4 = builder([0, 1, 2]) + + if py3k: + self.assertEqual("[0, 1, 2, 3, 'one', 'two']", str(list1)) + self.assertEqual(b"\x00\x01\x02", bytes(list4)) + self.assertEqual("[0, 1, 2, 3, 'one', 'two']", repr(list1)) + else: + self.assertEqual("[0, 1, 2, 3, u'one', u'two']", unicode(list1)) + self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", str(list1)) + self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", repr(list1)) + + self.assertTrue(list1 < list3) + self.assertTrue(list1 <= list3) + self.assertFalse(list1 == list3) + self.assertTrue(list1 != list3) + self.assertFalse(list1 > list3) + self.assertFalse(list1 >= list3) + + other1 = [0, 2, 3, 4] + self.assertTrue(list1 < other1) + self.assertTrue(list1 <= other1) + self.assertFalse(list1 == other1) + self.assertTrue(list1 != other1) + self.assertFalse(list1 > other1) + self.assertFalse(list1 >= other1) + + other2 = [0, 0, 1, 2] + self.assertFalse(list1 < other2) + self.assertFalse(list1 <= other2) + self.assertFalse(list1 == other2) + self.assertTrue(list1 != other2) + self.assertTrue(list1 > other2) + self.assertTrue(list1 >= other2) + + other3 = [0, 1, 2, 3, "one", "two"] + self.assertFalse(list1 < other3) + self.assertTrue(list1 <= other3) + self.assertTrue(list1 == other3) + self.assertFalse(list1 != other3) + self.assertFalse(list1 > other3) + self.assertTrue(list1 >= other3) + + self.assertTrue(bool(list1)) + self.assertFalse(bool(list2)) + + self.assertEqual(6, len(list1)) + self.assertEqual(0, len(list2)) + + out = [] + for obj in list1: + out.append(obj) + self.assertEqual([0, 1, 2, 3, "one", "two"], out) + + out = [] + for ch in list2: + out.append(ch) + self.assertEqual([], out) + + gen1 = iter(list1) + out = [] + for i in range(len(list1)): + out.append(next(gen1)) + self.assertRaises(StopIteration, next, gen1) + self.assertEqual([0, 1, 2, 3, "one", "two"], out) + gen2 = iter(list2) + self.assertRaises(StopIteration, next, gen2) + + self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1))) + self.assertEqual([], list(reversed(list2))) + + self.assertTrue("one" in list1) + self.assertTrue(3 in list1) + self.assertFalse(10 in list1) + self.assertFalse(0 in list2) + + self.assertEqual([], list2 * 5) + self.assertEqual([], 5 * list2) + self.assertEqual([0, 1, 2, 0, 1, 2, 0, 1, 2], list4 * 3) + self.assertEqual([0, 1, 2, 0, 1, 2, 0, 1, 2], 3 * list4) + list4 *= 2 + self.assertEqual([0, 1, 2, 0, 1, 2], list4) + + def _test_list_methods(self, builder): + """Run tests on the public methods of a list built with *builder*.""" + list1 = builder(range(5)) + list2 = builder(["foo"]) + list3 = builder([("a", 5), ("d", 2), ("b", 8), ("c", 3)]) + + list1.append(5) + list1.append(1) + list1.append(2) + self.assertEqual([0, 1, 2, 3, 4, 5, 1, 2], list1) + + self.assertEqual(0, list1.count(6)) + self.assertEqual(2, list1.count(1)) + + list1.extend(range(5, 8)) + self.assertEqual([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) + + self.assertEqual(1, list1.index(1)) + self.assertEqual(6, list1.index(1, 3)) + self.assertEqual(6, list1.index(1, 3, 7)) + self.assertRaises(ValueError, list1.index, 1, 3, 5) + + list1.insert(0, -1) + self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) + list1.insert(-1, 6.5) + self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1) + list1.insert(13, 8) + self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1) + + self.assertEqual(8, list1.pop()) + self.assertEqual(7, list1.pop()) + self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1) + self.assertEqual(-1, list1.pop(0)) + self.assertEqual(5, list1.pop(5)) + self.assertEqual(6.5, list1.pop(-1)) + self.assertEqual([0, 1, 2, 3, 4, 1, 2, 5, 6], list1) + self.assertEqual("foo", list2.pop()) + self.assertRaises(IndexError, list2.pop) + self.assertEqual([], list2) + + list1.remove(6) + self.assertEqual([0, 1, 2, 3, 4, 1, 2, 5], list1) + list1.remove(1) + self.assertEqual([0, 2, 3, 4, 1, 2, 5], list1) + list1.remove(1) + self.assertEqual([0, 2, 3, 4, 2, 5], list1) + self.assertRaises(ValueError, list1.remove, 1) + + list1.reverse() + self.assertEqual([5, 2, 4, 3, 2, 0], list1) + + list1.sort() + self.assertEqual([0, 2, 2, 3, 4, 5], list1) + list1.sort(reverse=True) + self.assertEqual([5, 4, 3, 2, 2, 0], list1) + if not py3k: + func = lambda x, y: abs(3 - x) - abs(3 - y) # Distance from 3 + list1.sort(cmp=func) + self.assertEqual([3, 4, 2, 2, 5, 0], list1) + list1.sort(cmp=func, reverse=True) + self.assertEqual([0, 5, 4, 2, 2, 3], list1) + list3.sort(key=lambda i: i[1]) + self.assertEqual([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) + list3.sort(key=lambda i: i[1], reverse=True) + self.assertEqual([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3) + + def test_docs(self): + """make sure the methods of SmartList/_ListProxy have docstrings""" + methods = ["append", "count", "extend", "index", "insert", "pop", + "remove", "reverse", "sort"] + for meth in methods: + expected = getattr(list, meth).__doc__ + smartlist_doc = getattr(SmartList, meth).__doc__ + listproxy_doc = getattr(_ListProxy, meth).__doc__ + self.assertEqual(expected, smartlist_doc) + self.assertEqual(expected, listproxy_doc) + + def test_doctest(self): + """make sure the test embedded in SmartList's docstring passes""" + parent = SmartList([0, 1, 2, 3]) + self.assertEqual([0, 1, 2, 3], parent) + child = parent[2:] + self.assertEqual([2, 3], child) + child.append(4) + self.assertEqual([2, 3, 4], child) + self.assertEqual([0, 1, 2, 3, 4], parent) + + def test_parent_get_set_del(self): + """make sure SmartList's getitem/setitem/delitem work""" + self._test_get_set_del_item(SmartList) + + def test_parent_add(self): + """make sure SmartList's add/radd/iadd work""" + self._test_add_radd_iadd(SmartList) + + def test_parent_unaffected_magics(self): + """sanity checks against SmartList features that were not modified""" + self._test_other_magic_methods(SmartList) + + def test_parent_methods(self): + """make sure SmartList's non-magic methods work, like append()""" + self._test_list_methods(SmartList) + + def test_child_get_set_del(self): + """make sure _ListProxy's getitem/setitem/delitem work""" + self._test_get_set_del_item(lambda L: SmartList(list(L))[:]) + self._test_get_set_del_item(lambda L: SmartList([999] + list(L))[1:]) + self._test_get_set_del_item(lambda L: SmartList(list(L) + [999])[:-1]) + builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] + self._test_get_set_del_item(builder) + + def test_child_add(self): + """make sure _ListProxy's add/radd/iadd work""" + self._test_add_radd_iadd(lambda L: SmartList(list(L))[:]) + self._test_add_radd_iadd(lambda L: SmartList([999] + list(L))[1:]) + self._test_add_radd_iadd(lambda L: SmartList(list(L) + [999])[:-1]) + builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] + self._test_add_radd_iadd(builder) + + def test_child_other_magics(self): + """make sure _ListProxy's other magically implemented features work""" + self._test_other_magic_methods(lambda L: SmartList(list(L))[:]) + self._test_other_magic_methods(lambda L: SmartList([999] + list(L))[1:]) + self._test_other_magic_methods(lambda L: SmartList(list(L) + [999])[:-1]) + builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] + self._test_other_magic_methods(builder) + + def test_child_methods(self): + """make sure _ListProxy's non-magic methods work, like append()""" + self._test_list_methods(lambda L: SmartList(list(L))[:]) + self._test_list_methods(lambda L: SmartList([999] + list(L))[1:]) + self._test_list_methods(lambda L: SmartList(list(L) + [999])[:-1]) + builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] + self._test_list_methods(builder) + + def test_influence(self): + """make sure changes are propagated from parents to children""" + parent = SmartList([0, 1, 2, 3, 4, 5]) + child1 = parent[2:] + child2 = parent[2:5] + + parent.append(6) + child1.append(7) + child2.append(4.5) + self.assertEqual([0, 1, 2, 3, 4, 4.5, 5, 6, 7], parent) + self.assertEqual([2, 3, 4, 4.5, 5, 6, 7], child1) + self.assertEqual([2, 3, 4, 4.5], child2) + + parent.insert(0, -1) + parent.insert(4, 2.5) + parent.insert(10, 6.5) + self.assertEqual([-1, 0, 1, 2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], parent) + self.assertEqual([2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], child1) + self.assertEqual([2, 2.5, 3, 4, 4.5], child2) + + self.assertEqual(7, parent.pop()) + self.assertEqual(6.5, child1.pop()) + self.assertEqual(4.5, child2.pop()) + self.assertEqual([-1, 0, 1, 2, 2.5, 3, 4, 5, 6], parent) + self.assertEqual([2, 2.5, 3, 4, 5, 6], child1) + self.assertEqual([2, 2.5, 3, 4], child2) + + parent.remove(-1) + child1.remove(2.5) + self.assertEqual([0, 1, 2, 3, 4, 5, 6], parent) + self.assertEqual([2, 3, 4, 5, 6], child1) + self.assertEqual([2, 3, 4], child2) + + self.assertEqual(0, parent.pop(0)) + self.assertEqual([1, 2, 3, 4, 5, 6], parent) + self.assertEqual([2, 3, 4, 5, 6], child1) + self.assertEqual([2, 3, 4], child2) + + child2.reverse() + self.assertEqual([1, 4, 3, 2, 5, 6], parent) + self.assertEqual([4, 3, 2, 5, 6], child1) + self.assertEqual([4, 3, 2], child2) + + parent.extend([7, 8]) + child1.extend([8.1, 8.2]) + child2.extend([1.9, 1.8]) + self.assertEqual([1, 4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], parent) + self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) + self.assertEqual([4, 3, 2, 1.9, 1.8], child2) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py new file mode 100644 index 0000000..306f2fd --- /dev/null +++ b/tests/test_string_mixin.py @@ -0,0 +1,435 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +from sys import getdefaultencoding +from types import GeneratorType +import unittest + +from mwparserfromhell.compat import bytes, py3k, str +from mwparserfromhell.string_mixin import StringMixIn + +from .compat import range + +class _FakeString(StringMixIn): + def __init__(self, data): + self._data = data + + def __unicode__(self): + return self._data + + +class TestStringMixIn(unittest.TestCase): + """Test cases for the StringMixIn class.""" + + def test_docs(self): + """make sure the various methods of StringMixIn have docstrings""" + methods = [ + "capitalize", "center", "count", "encode", "endswith", + "expandtabs", "find", "format", "index", "isalnum", "isalpha", + "isdecimal", "isdigit", "islower", "isnumeric", "isspace", + "istitle", "isupper", "join", "ljust", "lower", "lstrip", + "partition", "replace", "rfind", "rindex", "rjust", "rpartition", + "rsplit", "rstrip", "split", "splitlines", "startswith", "strip", + "swapcase", "title", "translate", "upper", "zfill"] + if py3k: + methods.extend(["casefold", "format_map", "isidentifier", + "isprintable", "maketrans"]) + else: + methods.append("decode") + for meth in methods: + expected = getattr(str, meth).__doc__ + actual = getattr(StringMixIn, meth).__doc__ + self.assertEqual(expected, actual) + + def test_types(self): + """make sure StringMixIns convert to different types correctly""" + fstr = _FakeString("fake string") + self.assertEqual(str(fstr), "fake string") + self.assertEqual(bytes(fstr), b"fake string") + if py3k: + self.assertEqual(repr(fstr), "'fake string'") + else: + self.assertEqual(repr(fstr), b"u'fake string'") + + self.assertIsInstance(str(fstr), str) + self.assertIsInstance(bytes(fstr), bytes) + if py3k: + self.assertIsInstance(repr(fstr), str) + else: + self.assertIsInstance(repr(fstr), bytes) + + def test_comparisons(self): + """make sure comparison operators work""" + str1 = _FakeString("this is a fake string") + str2 = _FakeString("this is a fake string") + str3 = _FakeString("fake string, this is") + str4 = "this is a fake string" + str5 = "fake string, this is" + + self.assertFalse(str1 > str2) + self.assertTrue(str1 >= str2) + self.assertTrue(str1 == str2) + self.assertFalse(str1 != str2) + self.assertFalse(str1 < str2) + self.assertTrue(str1 <= str2) + + self.assertTrue(str1 > str3) + self.assertTrue(str1 >= str3) + self.assertFalse(str1 == str3) + self.assertTrue(str1 != str3) + self.assertFalse(str1 < str3) + self.assertFalse(str1 <= str3) + + self.assertFalse(str1 > str4) + self.assertTrue(str1 >= str4) + self.assertTrue(str1 == str4) + self.assertFalse(str1 != str4) + self.assertFalse(str1 < str4) + self.assertTrue(str1 <= str4) + + self.assertTrue(str1 > str5) + self.assertTrue(str1 >= str5) + self.assertFalse(str1 == str5) + self.assertTrue(str1 != str5) + self.assertFalse(str1 < str5) + self.assertFalse(str1 <= str5) + + def test_other_magics(self): + """test other magically implemented features, like len() and iter()""" + str1 = _FakeString("fake string") + str2 = _FakeString("") + expected = ["f", "a", "k", "e", " ", "s", "t", "r", "i", "n", "g"] + + self.assertTrue(str1) + self.assertFalse(str2) + self.assertEqual(11, len(str1)) + self.assertEqual(0, len(str2)) + + out = [] + for ch in str1: + out.append(ch) + self.assertEqual(expected, out) + + out = [] + for ch in str2: + out.append(ch) + self.assertEqual([], out) + + gen1 = iter(str1) + gen2 = iter(str2) + self.assertIsInstance(gen1, GeneratorType) + self.assertIsInstance(gen2, GeneratorType) + + out = [] + for i in range(len(str1)): + out.append(next(gen1)) + self.assertRaises(StopIteration, next, gen1) + self.assertEqual(expected, out) + self.assertRaises(StopIteration, next, gen2) + + self.assertEqual("gnirts ekaf", "".join(list(reversed(str1)))) + self.assertEqual([], list(reversed(str2))) + + self.assertEqual("f", str1[0]) + self.assertEqual(" ", str1[4]) + self.assertEqual("g", str1[10]) + self.assertEqual("n", str1[-2]) + self.assertRaises(IndexError, lambda: str1[11]) + self.assertRaises(IndexError, lambda: str2[0]) + + self.assertTrue("k" in str1) + self.assertTrue("fake" in str1) + self.assertTrue("str" in str1) + self.assertTrue("" in str1) + self.assertTrue("" in str2) + self.assertFalse("real" in str1) + self.assertFalse("s" in str2) + + def test_other_methods(self): + """test the remaining non-magic methods of StringMixIn""" + str1 = _FakeString("fake string") + self.assertEqual("Fake string", str1.capitalize()) + + self.assertEqual(" fake string ", str1.center(15)) + self.assertEqual(" fake string ", str1.center(16)) + self.assertEqual("qqfake stringqq", str1.center(15, "q")) + + self.assertEqual(1, str1.count("e")) + self.assertEqual(0, str1.count("z")) + self.assertEqual(1, str1.count("r", 7)) + self.assertEqual(0, str1.count("r", 8)) + self.assertEqual(1, str1.count("r", 5, 9)) + self.assertEqual(0, str1.count("r", 5, 7)) + + if not py3k: + str2 = _FakeString("fo") + self.assertEqual(str1, str1.decode()) + actual = _FakeString("\\U00010332\\U0001033f\\U00010344") + self.assertEqual("𐌲𐌿𐍄", actual.decode("unicode_escape")) + self.assertRaises(UnicodeError, str2.decode, "punycode") + self.assertEqual("", str2.decode("punycode", "ignore")) + + str3 = _FakeString("𐌲𐌿𐍄") + actual = b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84" + self.assertEqual(b"fake string", str1.encode()) + self.assertEqual(actual, str3.encode("utf-8")) + self.assertEqual(actual, str3.encode(encoding="utf-8")) + if getdefaultencoding() == "ascii": + self.assertRaises(UnicodeEncodeError, str3.encode) + elif getdefaultencoding() == "utf-8": + self.assertEqual(actual, str3.encode()) + self.assertRaises(UnicodeEncodeError, str3.encode, "ascii") + self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict") + if getdefaultencoding() == "ascii": + self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict") + elif getdefaultencoding() == "utf-8": + self.assertEqual(actual, str3.encode(errors="strict")) + self.assertEqual(b"", str3.encode("ascii", "ignore")) + if getdefaultencoding() == "ascii": + self.assertEqual(b"", str3.encode(errors="ignore")) + elif getdefaultencoding() == "utf-8": + self.assertEqual(actual, str3.encode(errors="ignore")) + + self.assertTrue(str1.endswith("ing")) + self.assertFalse(str1.endswith("ingh")) + + str4 = _FakeString("\tfoobar") + self.assertEqual("fake string", str1) + self.assertEqual(" foobar", str4.expandtabs()) + self.assertEqual(" foobar", str4.expandtabs(4)) + + self.assertEqual(3, str1.find("e")) + self.assertEqual(-1, str1.find("z")) + self.assertEqual(7, str1.find("r", 7)) + self.assertEqual(-1, str1.find("r", 8)) + self.assertEqual(7, str1.find("r", 5, 9)) + self.assertEqual(-1, str1.find("r", 5, 7)) + + str5 = _FakeString("foo{0}baz") + str6 = _FakeString("foo{abc}baz") + str7 = _FakeString("foo{0}{abc}buzz") + str8 = _FakeString("{0}{1}") + self.assertEqual("fake string", str1.format()) + self.assertEqual("foobarbaz", str5.format("bar")) + self.assertEqual("foobarbaz", str6.format(abc="bar")) + self.assertEqual("foobarbazbuzz", str7.format("bar", abc="baz")) + self.assertRaises(IndexError, str8.format, "abc") + + if py3k: + self.assertEqual("fake string", str1.format_map({})) + self.assertEqual("foobarbaz", str6.format_map({"abc": "bar"})) + self.assertRaises(ValueError, str5.format_map, {0: "abc"}) + + self.assertEqual(3, str1.index("e")) + self.assertRaises(ValueError, str1.index, "z") + self.assertEqual(7, str1.index("r", 7)) + self.assertRaises(ValueError, str1.index, "r", 8) + self.assertEqual(7, str1.index("r", 5, 9)) + self.assertRaises(ValueError, str1.index, "r", 5, 7) + + str9 = _FakeString("foobar") + str10 = _FakeString("foobar123") + str11 = _FakeString("foo bar") + self.assertTrue(str9.isalnum()) + self.assertTrue(str10.isalnum()) + self.assertFalse(str11.isalnum()) + + self.assertTrue(str9.isalpha()) + self.assertFalse(str10.isalpha()) + self.assertFalse(str11.isalpha()) + + str12 = _FakeString("123") + str13 = _FakeString("\u2155") + str14 = _FakeString("\u00B2") + self.assertFalse(str9.isdecimal()) + self.assertTrue(str12.isdecimal()) + self.assertFalse(str13.isdecimal()) + self.assertFalse(str14.isdecimal()) + + self.assertFalse(str9.isdigit()) + self.assertTrue(str12.isdigit()) + self.assertFalse(str13.isdigit()) + self.assertTrue(str14.isdigit()) + + if py3k: + self.assertTrue(str9.isidentifier()) + self.assertTrue(str10.isidentifier()) + self.assertFalse(str11.isidentifier()) + self.assertFalse(str12.isidentifier()) + + str15 = _FakeString("") + str16 = _FakeString("FooBar") + self.assertTrue(str9.islower()) + self.assertFalse(str15.islower()) + self.assertFalse(str16.islower()) + + self.assertFalse(str9.isnumeric()) + self.assertTrue(str12.isnumeric()) + self.assertTrue(str13.isnumeric()) + self.assertTrue(str14.isnumeric()) + + if py3k: + str16B = _FakeString("\x01\x02") + self.assertTrue(str9.isprintable()) + self.assertTrue(str13.isprintable()) + self.assertTrue(str14.isprintable()) + self.assertTrue(str15.isprintable()) + self.assertFalse(str16B.isprintable()) + + str17 = _FakeString(" ") + str18 = _FakeString("\t \t \r\n") + self.assertFalse(str1.isspace()) + self.assertFalse(str9.isspace()) + self.assertTrue(str17.isspace()) + self.assertTrue(str18.isspace()) + + str19 = _FakeString("This Sentence Looks Like A Title") + str20 = _FakeString("This sentence doesn't LookLikeATitle") + self.assertFalse(str15.istitle()) + self.assertTrue(str19.istitle()) + self.assertFalse(str20.istitle()) + + str21 = _FakeString("FOOBAR") + self.assertFalse(str9.isupper()) + self.assertFalse(str15.isupper()) + self.assertTrue(str21.isupper()) + + self.assertEqual("foobar", str15.join(["foo", "bar"])) + self.assertEqual("foo123bar123baz", str12.join(("foo", "bar", "baz"))) + + self.assertEqual("fake string ", str1.ljust(15)) + self.assertEqual("fake string ", str1.ljust(16)) + self.assertEqual("fake stringqqqq", str1.ljust(15, "q")) + + str22 = _FakeString("ß") + self.assertEqual("", str15.lower()) + self.assertEqual("foobar", str16.lower()) + self.assertEqual("ß", str22.lower()) + if py3k: + self.assertEqual("", str15.casefold()) + self.assertEqual("foobar", str16.casefold()) + self.assertEqual("ss", str22.casefold()) + + str23 = _FakeString(" fake string ") + self.assertEqual("fake string", str1.lstrip()) + self.assertEqual("fake string ", str23.lstrip()) + self.assertEqual("ke string", str1.lstrip("abcdef")) + + self.assertEqual(("fa", "ke", " string"), str1.partition("ke")) + self.assertEqual(("fake string", "", ""), str1.partition("asdf")) + + str24 = _FakeString("boo foo moo") + self.assertEqual("real string", str1.replace("fake", "real")) + self.assertEqual("bu fu moo", str24.replace("oo", "u", 2)) + + self.assertEqual(3, str1.rfind("e")) + self.assertEqual(-1, str1.rfind("z")) + self.assertEqual(7, str1.rfind("r", 7)) + self.assertEqual(-1, str1.rfind("r", 8)) + self.assertEqual(7, str1.rfind("r", 5, 9)) + self.assertEqual(-1, str1.rfind("r", 5, 7)) + + self.assertEqual(3, str1.rindex("e")) + self.assertRaises(ValueError, str1.rindex, "z") + self.assertEqual(7, str1.rindex("r", 7)) + self.assertRaises(ValueError, str1.rindex, "r", 8) + self.assertEqual(7, str1.rindex("r", 5, 9)) + self.assertRaises(ValueError, str1.rindex, "r", 5, 7) + + self.assertEqual(" fake string", str1.rjust(15)) + self.assertEqual(" fake string", str1.rjust(16)) + self.assertEqual("qqqqfake string", str1.rjust(15, "q")) + + self.assertEqual(("fa", "ke", " string"), str1.rpartition("ke")) + self.assertEqual(("", "", "fake string"), str1.rpartition("asdf")) + + str25 = _FakeString(" this is a sentence with whitespace ") + actual = ["this", "is", "a", "sentence", "with", "whitespace"] + self.assertEqual(actual, str25.rsplit()) + self.assertEqual(actual, str25.rsplit(None)) + actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", + "", "whitespace", ""] + self.assertEqual(actual, str25.rsplit(" ")) + actual = [" this is a", "sentence", "with", "whitespace"] + self.assertEqual(actual, str25.rsplit(None, 3)) + actual = [" this is a sentence with", "", "whitespace", ""] + self.assertEqual(actual, str25.rsplit(" ", 3)) + if py3k: + actual = [" this is a", "sentence", "with", "whitespace"] + self.assertEqual(actual, str25.rsplit(maxsplit=3)) + + self.assertEqual("fake string", str1.rstrip()) + self.assertEqual(" fake string", str23.rstrip()) + self.assertEqual("fake stri", str1.rstrip("ngr")) + + actual = ["this", "is", "a", "sentence", "with", "whitespace"] + self.assertEqual(actual, str25.split()) + self.assertEqual(actual, str25.split(None)) + actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", + "", "whitespace", ""] + self.assertEqual(actual, str25.split(" ")) + actual = ["this", "is", "a", "sentence with whitespace "] + self.assertEqual(actual, str25.split(None, 3)) + actual = ["", "", "", "this is a sentence with whitespace "] + self.assertEqual(actual, str25.split(" ", 3)) + if py3k: + actual = ["this", "is", "a", "sentence with whitespace "] + self.assertEqual(actual, str25.split(maxsplit=3)) + + str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") + self.assertEqual(["lines", "of", "text", "are", "presented", "here"], + str26.splitlines()) + self.assertEqual(["lines\n", "of\n", "text\r\n", "are\r\n", + "presented\n", "here"], str26.splitlines(True)) + + self.assertTrue(str1.startswith("fake")) + self.assertFalse(str1.startswith("faker")) + + self.assertEqual("fake string", str1.strip()) + self.assertEqual("fake string", str23.strip()) + self.assertEqual("ke stri", str1.strip("abcdefngr")) + + self.assertEqual("fOObAR", str16.swapcase()) + + self.assertEqual("Fake String", str1.title()) + + if py3k: + table1 = str.maketrans({97: "1", 101: "2", 105: "3", 111: "4", + 117: "5"}) + table2 = str.maketrans("aeiou", "12345") + table3 = str.maketrans("aeiou", "12345", "rts") + self.assertEqual("f1k2 str3ng", str1.translate(table1)) + self.assertEqual("f1k2 str3ng", str1.translate(table2)) + self.assertEqual("f1k2 3ng", str1.translate(table3)) + else: + table = {97: "1", 101: "2", 105: "3", 111: "4", 117: "5"} + self.assertEqual("f1k2 str3ng", str1.translate(table)) + + self.assertEqual("", str15.upper()) + self.assertEqual("FOOBAR", str16.upper()) + + self.assertEqual("123", str12.zfill(3)) + self.assertEqual("000123", str12.zfill(6)) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_template.py b/tests/test_template.py deleted file mode 100644 index b006033..0000000 --- a/tests/test_template.py +++ /dev/null @@ -1,106 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2012 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -from itertools import permutations -import unittest - -from mwparserfromhell.parameter import Parameter -from mwparserfromhell.template import Template - -class TestTemplate(unittest.TestCase): - def setUp(self): - self.name = "foo" - self.bar = Parameter("1", "bar") - self.baz = Parameter("2", "baz") - self.eggs = Parameter("eggs", "spam") - self.params = [self.bar, self.baz, self.eggs] - - def test_construct(self): - Template(self.name) - Template(self.name, self.params) - Template(name=self.name) - Template(name=self.name, params=self.params) - - def test_name(self): - templates = [ - Template(self.name), - Template(self.name, self.params), - Template(name=self.name), - Template(name=self.name, params=self.params) - ] - for template in templates: - self.assertEqual(template.name, self.name) - - def test_params(self): - for template in (Template(self.name), Template(name=self.name)): - self.assertEqual(template.params, []) - for template in (Template(self.name, self.params), - Template(name=self.name, params=self.params)): - self.assertEqual(template.params, self.params) - - def test_getitem(self): - template = Template(name=self.name, params=self.params) - self.assertIs(template[0], self.bar) - self.assertIs(template[1], self.baz) - self.assertIs(template[2], self.eggs) - self.assertIs(template["1"], self.bar) - self.assertIs(template["2"], self.baz) - self.assertIs(template["eggs"], self.eggs) - - def test_render(self): - tests = [ - (Template(self.name), "{{foo}}"), - (Template(self.name, self.params), "{{foo|bar|baz|eggs=spam}}") - ] - for template, rendered in tests: - self.assertEqual(template.render(), rendered) - - def test_repr(self): - correct1= 'Template(name=foo, params={})' - correct2 = 'Template(name=foo, params={"1": "bar", "2": "baz", "eggs": "spam"})' - tests = [(Template(self.name), correct1), - (Template(self.name, self.params), correct2)] - for template, correct in tests: - self.assertEqual(repr(template), correct) - self.assertEqual(str(template), correct) - - def test_cmp(self): - tmp1 = Template(self.name) - tmp2 = Template(name=self.name) - tmp3 = Template(self.name, []) - tmp4 = Template(name=self.name, params=[]) - tmp5 = Template(self.name, self.params) - tmp6 = Template(name=self.name, params=self.params) - - for tmpA, tmpB in permutations((tmp1, tmp2, tmp3, tmp4), 2): - self.assertEqual(tmpA, tmpB) - - for tmpA, tmpB in permutations((tmp5, tmp6), 2): - self.assertEqual(tmpA, tmpB) - - for tmpA in (tmp5, tmp6): - for tmpB in (tmp1, tmp2, tmp3, tmp4): - self.assertNotEqual(tmpA, tmpB) - self.assertNotEqual(tmpB, tmpA) - -if __name__ == "__main__": - unittest.main(verbosity=2) diff --git a/tests/test_tokens.py b/tests/test_tokens.py new file mode 100644 index 0000000..4620982 --- /dev/null +++ b/tests/test_tokens.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import py3k +from mwparserfromhell.parser import tokens + +class TestTokens(unittest.TestCase): + """Test cases for the Token class and its subclasses.""" + + def test_issubclass(self): + """check that all classes within the tokens module are really Tokens""" + for name in tokens.__all__: + klass = getattr(tokens, name) + self.assertTrue(issubclass(klass, tokens.Token)) + self.assertIsInstance(klass(), klass) + self.assertIsInstance(klass(), tokens.Token) + + def test_attributes(self): + """check that Token attributes can be managed properly""" + token1 = tokens.Token() + token2 = tokens.Token(foo="bar", baz=123) + + self.assertEqual("bar", token2.foo) + self.assertEqual(123, token2.baz) + self.assertRaises(KeyError, lambda: token1.foo) + self.assertRaises(KeyError, lambda: token2.bar) + + token1.spam = "eggs" + token2.foo = "ham" + del token2.baz + + self.assertEqual("eggs", token1.spam) + self.assertEqual("ham", token2.foo) + self.assertRaises(KeyError, lambda: token2.baz) + self.assertRaises(KeyError, delattr, token2, "baz") + + def test_repr(self): + """check that repr() on a Token works as expected""" + token1 = tokens.Token() + token2 = tokens.Token(foo="bar", baz=123) + token3 = tokens.Text(text="earwig" * 100) + hundredchars = ("earwig" * 100)[:97] + "..." + + self.assertEqual("Token()", repr(token1)) + if py3k: + token2repr1 = "Token(foo='bar', baz=123)" + token2repr2 = "Token(baz=123, foo='bar')" + token3repr = "Text(text='" + hundredchars + "')" + else: + token2repr1 = "Token(foo=u'bar', baz=123)" + token2repr2 = "Token(baz=123, foo=u'bar')" + token3repr = "Text(text=u'" + hundredchars + "')" + token2repr = repr(token2) + self.assertTrue(token2repr == token2repr1 or token2repr == token2repr2) + self.assertEqual(token3repr, repr(token3)) + + def test_equality(self): + """check that equivalent tokens are considered equal""" + token1 = tokens.Token() + token2 = tokens.Token() + token3 = tokens.Token(foo="bar", baz=123) + token4 = tokens.Text(text="asdf") + token5 = tokens.Text(text="asdf") + token6 = tokens.TemplateOpen(text="asdf") + + self.assertEqual(token1, token2) + self.assertEqual(token2, token1) + self.assertEqual(token4, token5) + self.assertEqual(token5, token4) + self.assertNotEqual(token1, token3) + self.assertNotEqual(token2, token3) + self.assertNotEqual(token4, token6) + self.assertNotEqual(token5, token6) + + def test_repr_equality(self): + "check that eval(repr(token)) == token" + tests = [ + tokens.Token(), + tokens.Token(foo="bar", baz=123), + tokens.Text(text="earwig") + ] + for token in tests: + self.assertEqual(token, eval(repr(token), vars(tokens))) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..c088530 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.nodes import Template, Text +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.utils import parse_anything +from mwparserfromhell.wikicode import Wikicode + +from ._test_tree_equality import TreeEqualityTestCase + +class TestUtils(TreeEqualityTestCase): + """Tests for the utils module, which provides parse_anything().""" + + def test_parse_anything_valid(self): + """tests for valid input to utils.parse_anything()""" + wrap = lambda L: Wikicode(SmartList(L)) + textify = lambda L: wrap([Text(item) for item in L]) + tests = [ + (wrap([Text("foobar")]), textify(["foobar"])), + (Template(wrap([Text("spam")])), + wrap([Template(textify(["spam"]))])), + ("fóóbar", textify(["fóóbar"])), + (b"foob\xc3\xa1r", textify(["foobár"])), + (123, textify(["123"])), + (True, textify(["True"])), + (None, wrap([])), + ([Text("foo"), Text("bar"), Text("baz")], + textify(["foo", "bar", "baz"])), + ([wrap([Text("foo")]), Text("bar"), "baz", 123, 456], + textify(["foo", "bar", "baz", "123", "456"])), + ([[[([[((("foo",),),)], "bar"],)]]], textify(["foo", "bar"])) + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, parse_anything(test)) + + def test_parse_anything_invalid(self): + """tests for invalid input to utils.parse_anything()""" + self.assertRaises(ValueError, parse_anything, Ellipsis) + self.assertRaises(ValueError, parse_anything, object) + self.assertRaises(ValueError, parse_anything, object()) + self.assertRaises(ValueError, parse_anything, type) + self.assertRaises(ValueError, parse_anything, ["foo", [object]]) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/tokenizer/templates.mwtest b/tests/tokenizer/templates.mwtest new file mode 100644 index 0000000..fa3c0a4 --- /dev/null +++ b/tests/tokenizer/templates.mwtest @@ -0,0 +1,599 @@ +name: no_params +label: simplest type of template +input: "{{template}}" +output: [TemplateOpen(), Text(text="template"), TemplateClose()] + +--- + +name: one_param_unnamed +label: basic template with one unnamed parameter +input: "{{foo|bar}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateClose()] + +--- + +name: one_param_named +label: basic template with one named parameter +input: "{{foo|bar=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: multiple_unnamed_params +label: basic template with multiple unnamed parameters +input: "{{foo|bar|baz|biz|buzz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateClose()] + +--- + +name: multiple_named_params +label: basic template with multiple named parameters +input: "{{foo|bar=baz|biz=buzz|buff=baff|usr=bin}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), Text(text="buzz"), TemplateParamSeparator(), Text(text="buff"), TemplateParamEquals(), Text(text="baff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamEquals(), Text(text="bin"), TemplateClose()] + +--- + +name: multiple_mixed_params +label: basic template with multiple unnamed/named parameters +input: "{{foo|bar=baz|biz|buzz=buff|usr|bin}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateParamEquals(), Text(text="buff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamSeparator(), Text(text="bin"), TemplateClose()] + +--- + +name: multiple_mixed_params2 +label: basic template with multiple unnamed/named parameters in another order +input: "{{foo|bar|baz|biz=buzz|buff=baff|usr=bin}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), Text(text="buzz"), TemplateParamSeparator(), Text(text="buff"), TemplateParamEquals(), Text(text="baff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamEquals(), Text(text="bin"), TemplateClose()] + +--- + +name: nested_unnamed_param +label: nested template as an unnamed parameter +input: "{{foo|{{bar}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()] + +--- + +name: nested_named_param_value +label: nested template as a parameter value with a named parameter +input: "{{foo|bar={{baz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_named_param_name_and_value +label: nested templates as a parameter name and value +input: "{{foo|{{bar}}={{baz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start +label: nested template at the beginning of a template name +input: "{{{{foo}}bar}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose()] + +--- + +name: nested_name_start_unnamed_param +label: nested template at the beginning of a template name and as an unnamed parameter +input: "{{{{foo}}bar|{{baz}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start_named_param_value +label: nested template at the beginning of a template name and as a parameter value with a named parameter +input: "{{{{foo}}bar|baz={{biz}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start_named_param_name_and_value +label: nested template at the beginning of a template name and as a parameter name and value +input: "{{{{foo}}bar|{{baz}}={{biz}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_end +label: nested template at the end of a template name +input: "{{foo{{bar}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_end_unnamed_param +label: nested template at the end of a template name and as an unnamed parameter +input: "{{foo{{bar}}|{{baz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_end_named_param_value +label: nested template at the end of a template name and as a parameter value with a named parameter +input: "{{foo{{bar}}|baz={{biz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_end_named_param_name_and_value +label: nested template at the end of a template name and as a parameter name and value +input: "{{foo{{bar}}|{{baz}}={{biz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_mid +label: nested template in the middle of a template name +input: "{{foo{{bar}}baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose()] + +--- + +name: nested_name_mid_unnamed_param +label: nested template in the middle of a template name and as an unnamed parameter +input: "{{foo{{bar}}baz|{{biz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_mid_named_param_value +label: nested template in the middle of a template name and as a parameter value with a named parameter +input: "{{foo{{bar}}baz|biz={{buzz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_mid_named_param_name_and_value +label: nested template in the middle of a template name and as a parameter name and value +input: "{{foo{{bar}}baz|{{biz}}={{buzz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start_end +label: nested template at the beginning and end of a template name +input: "{{{{foo}}{{bar}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start_end_unnamed_param +label: nested template at the beginning and end of a template name and as an unnamed parameter +input: "{{{{foo}}{{bar}}|{{baz}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start_end_named_param_value +label: nested template at the beginning and end of a template name and as a parameter value with a named parameter +input: "{{{{foo}}{{bar}}|baz={{biz}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start_end_named_param_name_and_value +label: nested template at the beginning and end of a template name and as a parameter name and value +input: "{{{{foo}}{{bar}}|{{baz}}={{biz}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_names_multiple +label: multiple nested templates within nested templates +input: "{{{{{{{{foo}}bar}}baz}}biz}}" +output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateClose()] + +--- + +name: nested_names_multiple_unnamed_param +label: multiple nested templates within nested templates with a nested unnamed parameter +input: "{{{{{{{{foo}}bar}}baz}}biz|{{buzz}}}}" +output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_names_multiple_named_param_value +label: multiple nested templates within nested templates with a nested parameter value in a named parameter +input: "{{{{{{{{foo}}bar}}baz}}biz|buzz={{bin}}}}" +output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateParamEquals(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()] + +--- + +name: nested_names_multiple_named_param_name_and_value +label: multiple nested templates within nested templates with a nested parameter name and value +input: "{{{{{{{{foo}}bar}}baz}}biz|{{buzz}}={{bin}}}}" +output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()] + +--- + +name: mixed_nested_templates +label: mixed assortment of nested templates within template names, parameter names, and values +input: "{{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}" +output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), Text(text="biz"), TemplateClose(), Text(text="buzz"), TemplateClose(), Text(text="usr"), TemplateParamSeparator(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()] + +--- + +name: newlines_start +label: a newline at the start of a template name +input: "{{\nfoobar}}" +output: [TemplateOpen(), Text(text="\nfoobar"), TemplateClose()] + +--- + +name: newlines_end +label: a newline at the end of a template name +input: "{{foobar\n}}" +output: [TemplateOpen(), Text(text="foobar\n"), TemplateClose()] + +--- + +name: newlines_start_end +label: a newline at the start and end of a template name +input: "{{\nfoobar\n}}" +output: [TemplateOpen(), Text(text="\nfoobar\n"), TemplateClose()] + +--- + +name: newlines_mid +label: a newline at the middle of a template name +input: "{{foo\nbar}}" +output: [Text(text="{{foo\nbar}}")] + +--- + +name: newlines_start_mid +label: a newline at the start and middle of a template name +input: "{{\nfoo\nbar}}" +output: [Text(text="{{\nfoo\nbar}}")] + +--- + +name: newlines_mid_end +label: a newline at the middle and end of a template name +input: "{{foo\nbar\n}}" +output: [Text(text="{{foo\nbar\n}}")] + +--- + +name: newlines_start_mid_end +label: a newline at the start, middle, and end of a template name +input: "{{\nfoo\nbar\n}}" +output: [Text(text="{{\nfoo\nbar\n}}")] + +--- + +name: newlines_unnamed_param +label: newlines within an unnamed template parameter +input: "{{foo|\nb\nar\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()] + +--- + +name: newlines_enclose_template_name_unnamed_param +label: newlines enclosing a template name and within an unnamed template parameter +input: "{{\nfoo\n|\nb\nar\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()] + +--- + +name: newlines_within_template_name_unnamed_param +label: newlines within a template name and within an unnamed template parameter +input: "{{\nfo\no\n|\nb\nar\n}}" +output: [Text(text="{{\nfo\no\n|\nb\nar\n}}")] + +--- + +name: newlines_enclose_template_name_named_param_value +label: newlines enclosing a template name and within a named parameter value +input: "{{\nfoo\n|1=\nb\nar\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nb\nar\n"), TemplateClose()] + +--- + +name: newlines_within_template_name_named_param_value +label: newlines within a template name and within a named parameter value +input: "{{\nf\noo\n|1=\nb\nar\n}}" +output: [Text(text="{{\nf\noo\n|1=\nb\nar\n}}")] + +--- + +name: newlines_named_param_name +label: newlines within a parameter name +input: "{{foo|\nb\nar\n=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: newlines_named_param_name_param_value +label: newlines within a parameter name and within a parameter value +input: "{{foo|\nb\nar\n=\nba\nz\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nba\nz\n"), TemplateClose()] + +--- + +name: newlines_enclose_template_name_named_param_name +label: newlines enclosing a template name and within a parameter name +input: "{{\nfoo\n|\nb\nar\n=baz}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: newlines_enclose_template_name_named_param_name_param_value +label: newlines enclosing a template name and within a parameter name and within a parameter value +input: "{{\nfoo\n|\nb\nar\n=\nba\nz\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nba\nz\n"), TemplateClose()] + +--- + +name: newlines_within_template_name_named_param_name +label: newlines within a template name and within a parameter name +input: "{{\nfo\no\n|\nb\nar\n=baz}}" +output: [Text(text="{{\nfo\no\n|\nb\nar\n=baz}}")] + +--- + +name: newlines_within_template_name_named_param_name_param_value +label: newlines within a template name and within a parameter name and within a parameter value +input: "{{\nf\noo\n|\nb\nar\n=\nba\nz\n}}" +output: [Text(text="{{\nf\noo\n|\nb\nar\n=\nba\nz\n}}")] + +--- + +name: newlines_wildcard +label: a random, complex assortment of templates and newlines +input: "{{\nfoo\n|\nb\nar\n=\nb\naz\n|\nb\nuz\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nb\naz\n"), TemplateParamSeparator(), Text(text="\nb\nuz\n"), TemplateClose()] + +--- + +name: newlines_wildcard_redux +label: an even more random and complex assortment of templates and newlines +input: "{{\nfoo\n|\n{{\nbar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\n"), TemplateOpen(), Text(text="\nbar\n"), TemplateParamSeparator(), Text(text="\nb\naz\n"), TemplateParamEquals(), Text(text="\nb\niz\n"), TemplateClose(), Text(text="\n"), TemplateParamEquals(), Text(text="\nb\nuzz\n"), TemplateClose()] + +--- + +name: newlines_wildcard_redux_invalid +label: a variation of the newlines_wildcard_redux test that is invalid +input: "{{\nfoo\n|\n{{\nb\nar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}" +output: [Text(text="{{\nfoo\n|\n{{\nb\nar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}")] + +--- + +name: invalid_name_left_brace_middle +label: invalid characters in template name: left brace in middle +input: "{{foo{bar}}" +output: [Text(text="{{foo{bar}}")] + +--- + +name: invalid_name_right_brace_middle +label: invalid characters in template name: right brace in middle +input: "{{foo}bar}}" +output: [Text(text="{{foo}bar}}")] + +--- + +name: invalid_name_left_braces +label: invalid characters in template name: two left braces in middle +input: "{{foo{b{ar}}" +output: [Text(text="{{foo{b{ar}}")] + +--- + +name: invalid_name_left_bracket_middle +label: invalid characters in template name: left bracket in middle +input: "{{foo[bar}}" +output: [Text(text="{{foo[bar}}")] + +--- + +name: invalid_name_right_bracket_middle +label: invalid characters in template name: right bracket in middle +input: "{{foo]bar}}" +output: [Text(text="{{foo]bar}}")] + +--- + +name: invalid_name_left_bracket_start +label: invalid characters in template name: left bracket at start +input: "{{[foobar}}" +output: [Text(text="{{[foobar}}")] + +--- + +name: invalid_name_right_bracket_start +label: invalid characters in template name: right bracket at end +input: "{{foobar]}}" +output: [Text(text="{{foobar]}}")] + +--- + +name: valid_name_left_brace_start +label: valid characters in template name: left brace at start +input: "{{{foobar}}" +output: [Text(text="{"), TemplateOpen(), Text(text="foobar"), TemplateClose()] + +--- + +name: valid_unnamed_param_left_brace +label: valid characters in unnamed template parameter: left brace +input: "{{foo|ba{r}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r"), TemplateClose()] + +--- + +name: valid_unnamed_param_braces +label: valid characters in unnamed template parameter: left and right braces +input: "{{foo|ba{r}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r"), TemplateClose(), Text(text="}")] + +--- + +name: valid_param_name_braces +label: valid characters in template parameter name: left and right braces +input: "{{foo|ba{r}=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r}"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: valid_param_name_brackets +label: valid characters in unnamed template parameter: left and right brackets +input: "{{foo|ba[r]=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba[r]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: valid_param_name_double_left_brackets +label: valid characters in unnamed template parameter: double left brackets +input: "{{foo|bar[[in\nvalid=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar[[in\nvalid"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: valid_param_name_double_right_brackets +label: valid characters in unnamed template parameter: double right brackets +input: "{{foo|bar]]=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar]]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: valid_param_name_double_brackets +label: valid characters in unnamed template parameter: double left and right brackets +input: "{{foo|bar[[in\nvalid]]=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar[[in\nvalid]]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: invalid_param_name_double_left_braces +label: invalid characters in template parameter name: double left braces +input: "{{foo|bar{{in\nvalid=baz}}" +output: [Text(text="{{foo|bar{{in\nvalid=baz}}")] + +--- + +name: invalid_param_name_double_braces +label: invalid characters in template parameter name: double left and right braces +input: "{{foo|bar{{in\nvalid}}=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar{{in\nvalid"), TemplateClose(), Text(text="=baz}}")] + +--- + +name: incomplete_plain +label: incomplete templates that should fail gracefully: no close whatsoever +input: "{{stuff}} {{foobar" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar")] + +--- + +name: incomplete_right_brace +label: incomplete templates that should fail gracefully: only one right brace +input: "{{stuff}} {{foobar}" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar}")] + +--- + +name: incomplete_pipe +label: incomplete templates that should fail gracefully: a pipe +input: "{{stuff}} {{foobar|" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar|")] + +--- + +name: incomplete_unnamed_param +label: incomplete templates that should fail gracefully: an unnamed parameter +input: "{{stuff}} {{foo|bar" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar")] + +--- + +name: incomplete_unnamed_param_pipe +label: incomplete templates that should fail gracefully: an unnamed parameter, then a pipe +input: "{{stuff}} {{foo|bar|" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|")] + +--- + +name: incomplete_valueless_param +label: incomplete templates that should fail gracefully: an a named parameter with no value +input: "{{stuff}} {{foo|bar=" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=")] + +--- + +name: incomplete_valueless_param_pipe +label: incomplete templates that should fail gracefully: a named parameter with no value, then a pipe +input: "{{stuff}} {{foo|bar=|" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=|")] + +--- + +name: incomplete_named_param +label: incomplete templates that should fail gracefully: a named parameter with a value +input: "{{stuff}} {{foo|bar=baz" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz")] + +--- + +name: incomplete_named_param_pipe +label: incomplete templates that should fail gracefully: a named parameter with a value, then a paipe +input: "{{stuff}} {{foo|bar=baz|" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|")] + +--- + +name: incomplete_two_unnamed_params +label: incomplete templates that should fail gracefully: two unnamed parameters +input: "{{stuff}} {{foo|bar|baz" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz")] + +--- + +name: incomplete_unnamed_param_valueless_param +label: incomplete templates that should fail gracefully: an unnamed parameter, then a named parameter with no value +input: "{{stuff}} {{foo|bar|baz=" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz=")] + +--- + +name: incomplete_unnamed_param_named_param +label: incomplete templates that should fail gracefully: an unnamed parameter, then a named parameter with a value +input: "{{stuff}} {{foo|bar|baz=biz" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz=biz")] + +--- + +name: incomplete_named_param_unnamed_param +label: incomplete templates that should fail gracefully: a named parameter with a value, then an unnamed parameter +input: "{{stuff}} {{foo|bar=baz|biz" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz")] + +--- + +name: incomplete_named_param_valueless_param +label: incomplete templates that should fail gracefully: a named parameter with a value, then a named parameter with no value +input: "{{stuff}} {{foo|bar=baz|biz=" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz=")] + +--- + +name: incomplete_two_named_params +label: incomplete templates that should fail gracefully: two named parameters with values +input: "{{stuff}} {{foo|bar=baz|biz=buzz" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz=buzz")] + +--- + +name: incomplete_nested_template_as_unnamed_param +label: incomplete templates that should fail gracefully: a valid nested template as an unnamed parameter +input: "{{stuff}} {{foo|{{bar}}" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|"), TemplateOpen(), Text(text="bar"), TemplateClose()] + +--- + +name: incomplete_nested_template_as_param_value +label: incomplete templates that should fail gracefully: a valid nested template as a parameter value +input: "{{stuff}} {{foo|bar={{baz}}" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar="), TemplateOpen(), Text(text="baz"), TemplateClose()] diff --git a/tests/tokenizer/text.mwtest b/tests/tokenizer/text.mwtest new file mode 100644 index 0000000..77d5f50 --- /dev/null +++ b/tests/tokenizer/text.mwtest @@ -0,0 +1,25 @@ +name: basic +label: sanity check for basic text parsing, no gimmicks +input: "foobar" +output: [Text(text="foobar")] + +--- + +name: newlines +label: slightly more complex text parsing, with newlines +input: "This is a line of text.\nThis is another line of text.\nThis is another." +output: [Text(text="This is a line of text.\nThis is another line of text.\nThis is another.")] + +--- + +name: unicode +label: ensure unicode data is handled properly +input: "Thís ís å sëñtënce with diœcritiçs." +output: [Text(text="Thís ís å sëñtënce with diœcritiçs.")] + +--- + +name: unicode2 +label: additional unicode check for non-BMP codepoints +input: "𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰" +output: [Text(text="𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰")]