Browse Source

Merge branch 'feature/tests' into develop (#7)

tags/v0.2
Ben Kurtovic 11 years ago
parent
commit
81592b3bd7
32 changed files with 2968 additions and 502 deletions
  1. +1
    -0
      .gitignore
  2. +5
    -2
      README.rst
  3. +29
    -29
      mwparserfromhell/compat.py
  4. +1
    -0
      mwparserfromhell/nodes/argument.py
  5. +1
    -0
      mwparserfromhell/nodes/comment.py
  6. +4
    -1
      mwparserfromhell/nodes/html_entity.py
  7. +1
    -0
      mwparserfromhell/nodes/text.py
  8. +1
    -0
      mwparserfromhell/nodes/wikilink.py
  9. +39
    -23
      mwparserfromhell/parser/contexts.py
  10. +61
    -81
      mwparserfromhell/parser/tokenizer.c
  11. +7
    -3
      mwparserfromhell/parser/tokenizer.h
  12. +68
    -33
      mwparserfromhell/parser/tokenizer.py
  13. +133
    -44
      mwparserfromhell/smart_list.py
  14. +82
    -25
      mwparserfromhell/string_mixin.py
  15. +2
    -1
      setup.py
  16. +130
    -0
      tests/MWPFHTestCase.tmlanguage
  17. +121
    -0
      tests/_test_tokenizer.py
  18. +113
    -0
      tests/_test_tree_equality.py
  19. +20
    -0
      tests/compat.py
  20. +261
    -0
      tests/test_builder.py
  21. +47
    -0
      tests/test_ctokenizer.py
  22. +131
    -0
      tests/test_docs.py
  23. +0
    -119
      tests/test_parameter.py
  24. +41
    -35
      tests/test_parser.py
  25. +43
    -0
      tests/test_pytokenizer.py
  26. +392
    -0
      tests/test_smart_list.py
  27. +435
    -0
      tests/test_string_mixin.py
  28. +0
    -106
      tests/test_template.py
  29. +108
    -0
      tests/test_tokens.py
  30. +67
    -0
      tests/test_utils.py
  31. +599
    -0
      tests/tokenizer/templates.mwtest
  32. +25
    -0
      tests/tokenizer/text.mwtest

+ 1
- 0
.gitignore View File

@@ -1,4 +1,5 @@
*.pyc
*.so
*.egg
*.egg-info
.DS_Store


+ 5
- 2
README.rst View File

@@ -18,7 +18,8 @@ so you can install the latest release with ``pip install mwparserfromhell``
cd mwparserfromhell
python setup.py install

You can run the comprehensive unit testing suite with ``python setup.py test``.
You can run the comprehensive unit testing suite with
``python setup.py test -q``.

Usage
-----
@@ -124,7 +125,9 @@ following code (via the API_)::
import mwparserfromhell
API_URL = "http://en.wikipedia.org/w/api.php"
def parse(title):
raw = urllib.urlopen(API_URL, data).read()
data = {"action": "query", "prop": "revisions", "rvlimit": 1,
"rvprop": "content", "format": "json", "titles": title}
raw = urllib.urlopen(API_URL, urllib.urlencode(data)).read()
res = json.loads(raw)
text = res["query"]["pages"].values()[0]["revisions"][0]["*"]
return mwparserfromhell.parse(text)


+ 29
- 29
mwparserfromhell/compat.py View File

@@ -1,29 +1,29 @@
# -*- coding: utf-8 -*-
"""
Implements support for both Python 2 and Python 3 by defining common types in
terms of their Python 2/3 variants. For example, :py:class:`str` is set to
:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise,
:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These
types are meant to be imported directly from within the parser's modules.
"""
import sys
py3k = sys.version_info[0] == 3
if py3k:
bytes = bytes
str = str
basestring = str
maxsize = sys.maxsize
import html.entities as htmlentities
else:
bytes = str
str = unicode
basestring = basestring
maxsize = sys.maxint
import htmlentitydefs as htmlentities
del sys
# -*- coding: utf-8 -*-
"""
Implements support for both Python 2 and Python 3 by defining common types in
terms of their Python 2/3 variants. For example, :py:class:`str` is set to
:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise,
:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These
types are meant to be imported directly from within the parser's modules.
"""
import sys
py3k = sys.version_info[0] == 3
if py3k:
bytes = bytes
str = str
basestring = str
maxsize = sys.maxsize
import html.entities as htmlentities
else:
bytes = str
str = unicode
basestring = basestring
maxsize = sys.maxint
import htmlentitydefs as htmlentities
del sys

+ 1
- 0
mwparserfromhell/nodes/argument.py View File

@@ -30,6 +30,7 @@ __all__ = ["Argument"]

class Argument(Node):
"""Represents a template argument substitution, like ``{{{foo}}}``."""

def __init__(self, name, default=None):
super(Argument, self).__init__()
self._name = name


+ 1
- 0
mwparserfromhell/nodes/comment.py View File

@@ -29,6 +29,7 @@ __all__ = ["Comment"]

class Comment(Node):
"""Represents a hidden HTML comment, like ``<!-- foobar -->``."""

def __init__(self, contents):
super(Comment, self).__init__()
self._contents = contents


+ 4
- 1
mwparserfromhell/nodes/html_entity.py View File

@@ -135,7 +135,10 @@ class HTMLEntity(Node):

@hex_char.setter
def hex_char(self, newval):
self._hex_char = bool(newval)
newval = str(newval)
if newval not in ("x", "X"):
raise ValueError(newval)
self._hex_char = newval

def normalize(self):
"""Return the unicode character represented by the HTML entity."""


+ 1
- 0
mwparserfromhell/nodes/text.py View File

@@ -29,6 +29,7 @@ __all__ = ["Text"]

class Text(Node):
"""Represents ordinary, unformatted text with no special properties."""

def __init__(self, value):
super(Text, self).__init__()
self._value = value


+ 1
- 0
mwparserfromhell/nodes/wikilink.py View File

@@ -30,6 +30,7 @@ __all__ = ["Wikilink"]

class Wikilink(Node):
"""Represents an internal wikilink, like ``[[Foo|Bar]]``."""

def __init__(self, title, text=None):
super(Wikilink, self).__init__()
self._title = title


+ 39
- 23
mwparserfromhell/parser/contexts.py View File

@@ -62,6 +62,15 @@ Local (stack-specific) contexts:

* :py:const:`COMMENT`

* :py:const:`SAFETY_CHECK`

* :py:const:`HAS_TEXT`
* :py:const:`FAIL_ON_TEXT`
* :py:const:`FAIL_NEXT`
* :py:const:`FAIL_ON_LBRACE`
* :py:const:`FAIL_ON_RBRACE`
* :py:const:`FAIL_ON_EQUALS`

Global contexts:

* :py:const:`GL_HEADING`
@@ -69,29 +78,36 @@ Global contexts:

# Local contexts:

TEMPLATE = 0b00000000000111
TEMPLATE_NAME = 0b00000000000001
TEMPLATE_PARAM_KEY = 0b00000000000010
TEMPLATE_PARAM_VALUE = 0b00000000000100

ARGUMENT = 0b00000000011000
ARGUMENT_NAME = 0b00000000001000
ARGUMENT_DEFAULT = 0b00000000010000

WIKILINK = 0b00000001100000
WIKILINK_TITLE = 0b00000000100000
WIKILINK_TEXT = 0b00000001000000

HEADING = 0b01111110000000
HEADING_LEVEL_1 = 0b00000010000000
HEADING_LEVEL_2 = 0b00000100000000
HEADING_LEVEL_3 = 0b00001000000000
HEADING_LEVEL_4 = 0b00010000000000
HEADING_LEVEL_5 = 0b00100000000000
HEADING_LEVEL_6 = 0b01000000000000

COMMENT = 0b10000000000000

TEMPLATE = 0b00000000000000000111
TEMPLATE_NAME = 0b00000000000000000001
TEMPLATE_PARAM_KEY = 0b00000000000000000010
TEMPLATE_PARAM_VALUE = 0b00000000000000000100

ARGUMENT = 0b00000000000000011000
ARGUMENT_NAME = 0b00000000000000001000
ARGUMENT_DEFAULT = 0b00000000000000010000

WIKILINK = 0b00000000000001100000
WIKILINK_TITLE = 0b00000000000000100000
WIKILINK_TEXT = 0b00000000000001000000

HEADING = 0b00000001111110000000
HEADING_LEVEL_1 = 0b00000000000010000000
HEADING_LEVEL_2 = 0b00000000000100000000
HEADING_LEVEL_3 = 0b00000000001000000000
HEADING_LEVEL_4 = 0b00000000010000000000
HEADING_LEVEL_5 = 0b00000000100000000000
HEADING_LEVEL_6 = 0b00000001000000000000

COMMENT = 0b00000010000000000000

SAFETY_CHECK = 0b11111100000000000000
HAS_TEXT = 0b00000100000000000000
FAIL_ON_TEXT = 0b00001000000000000000
FAIL_NEXT = 0b00010000000000000000
FAIL_ON_LBRACE = 0b00100000000000000000
FAIL_ON_RBRACE = 0b01000000000000000000
FAIL_ON_EQUALS = 0b10000000000000000000

# Global contexts:



+ 61
- 81
mwparserfromhell/parser/tokenizer.c View File

@@ -1,6 +1,6 @@
/*
Tokenizer for MWParserFromHell
Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
@@ -843,7 +843,8 @@ Tokenizer_handle_heading_end(Tokenizer* self)
self->head++;
}
current = log2(self->topstack->context / LC_HEADING_LEVEL_1) + 1;
level = current > best ? (best > 6 ? 6 : best) : (current > 6 ? 6 : current);
level = current > best ? (best > 6 ? 6 : best) :
(current > 6 ? 6 : current);
after = (HeadingData*) Tokenizer_parse(self, self->topstack->context);
if (BAD_ROUTE) {
RESET_ROUTE();
@@ -956,11 +957,11 @@ Tokenizer_really_parse_entity(Tokenizer* self)
else
numeric = hexadecimal = 0;
if (hexadecimal)
valid = "0123456789abcdefABCDEF";
valid = HEXDIGITS;
else if (numeric)
valid = "0123456789";
valid = DIGITS;
else
valid = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
valid = ALPHANUM;
text = calloc(MAX_ENTITY_SIZE, sizeof(char));
if (!text) {
PyErr_NoMemory();
@@ -1005,7 +1006,7 @@ Tokenizer_really_parse_entity(Tokenizer* self)
i = 0;
while (1) {
def = entitydefs[i];
if (!def) // We've reached the end of the def list without finding it
if (!def) // We've reached the end of the defs without finding it
FAIL_ROUTE_AND_EXIT()
if (strcmp(text, def) == 0)
break;
@@ -1135,48 +1136,59 @@ Tokenizer_parse_comment(Tokenizer* self)
}

/*
Make sure we are not trying to write an invalid character.
Make sure we are not trying to write an invalid character. Return 0 if
everything is safe, or -1 if the route must be failed.
*/
static void
static int
Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
{
if (context & LC_FAIL_NEXT) {
Tokenizer_fail_route(self);
return;
return -1;
}
if (context & LC_WIKILINK_TITLE) {
if (data == *"]" || data == *"{")
self->topstack->context |= LC_FAIL_NEXT;
else if (data == *"\n" || data == *"[" || data == *"}")
Tokenizer_fail_route(self);
return;
return -1;
return 0;
}
if (context & LC_TEMPLATE_NAME) {
if (data == *"{" || data == *"}" || data == *"[") {
self->topstack->context |= LC_FAIL_NEXT;
return;
return 0;
}
if (data == *"]") {
Tokenizer_fail_route(self);
return;
return -1;
}
if (data == *"|")
return;
return 0;
if (context & LC_HAS_TEXT) {
if (context & LC_FAIL_ON_TEXT) {
if (!Py_UNICODE_ISSPACE(data))
return -1;
}
else {
if (data == *"\n")
self->topstack->context |= LC_FAIL_ON_TEXT;
}
}
else if (!Py_UNICODE_ISSPACE(data))
self->topstack->context |= LC_HAS_TEXT;
}
else if (context & (LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)) {
else {
if (context & LC_FAIL_ON_EQUALS) {
if (data == *"=") {
Tokenizer_fail_route(self);
return;
return -1;
}
}
else if (context & LC_FAIL_ON_LBRACE) {
if (data == *"{") {
if (data == *"{" || (Tokenizer_READ(self, -1) == *"{" &&
Tokenizer_READ(self, -2) == *"{")) {
if (context & LC_TEMPLATE)
self->topstack->context |= LC_FAIL_ON_EQUALS;
else
self->topstack->context |= LC_FAIL_NEXT;
return;
return 0;
}
self->topstack->context ^= LC_FAIL_ON_LBRACE;
}
@@ -1186,7 +1198,7 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
self->topstack->context |= LC_FAIL_ON_EQUALS;
else
self->topstack->context |= LC_FAIL_NEXT;
return;
return 0;
}
self->topstack->context ^= LC_FAIL_ON_RBRACE;
}
@@ -1195,47 +1207,7 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data)
else if (data == *"}")
self->topstack->context |= LC_FAIL_ON_RBRACE;
}
if (context & LC_HAS_TEXT) {
if (context & LC_FAIL_ON_TEXT) {
if (!Py_UNICODE_ISSPACE(data)) {
if (context & LC_TEMPLATE_PARAM_KEY) {
self->topstack->context ^= LC_FAIL_ON_TEXT;
self->topstack->context |= LC_FAIL_ON_EQUALS;
}
else
Tokenizer_fail_route(self);
return;
}
}
else {
if (data == *"\n")
self->topstack->context |= LC_FAIL_ON_TEXT;
}
}
else if (!Py_UNICODE_ISSPACE(data))
self->topstack->context |= LC_HAS_TEXT;
}

/*
Unset any safety-checking contexts set by Tokenizer_verify_safe(). Used
when we preserve a context but previous data becomes invalid, like when
moving between template parameters.
*/
static void
Tokenizer_reset_safety_checks(Tokenizer* self)
{
static int checks[] = {
LC_HAS_TEXT, LC_FAIL_ON_TEXT, LC_FAIL_NEXT, LC_FAIL_ON_LBRACE,
LC_FAIL_ON_RBRACE, LC_FAIL_ON_EQUALS, 0};
int context = self->topstack->context, i = 0, this;
while (1) {
this = checks[i];
if (!this)
return;
if (context & this)
self->topstack->context ^= this;
i++;
}
return 0;
}

/*
@@ -1258,12 +1230,12 @@ Tokenizer_parse(Tokenizer* self, int context)
this = Tokenizer_READ(self, 0);
this_context = self->topstack->context;
if (this_context & unsafe_contexts) {
Tokenizer_verify_safe(self, this_context, this);
if (BAD_ROUTE) {
if (Tokenizer_verify_safe(self, this_context, this) < 0) {
if (this_context & LC_TEMPLATE_PARAM_KEY) {
trash = Tokenizer_pop(self);
Py_XDECREF(trash);
}
Tokenizer_fail_route(self);
return NULL;
}
}
@@ -1303,7 +1275,6 @@ Tokenizer_parse(Tokenizer* self, int context)
self->topstack->context ^= LC_FAIL_NEXT;
}
else if (this == *"|" && this_context & LC_TEMPLATE) {
Tokenizer_reset_safety_checks(self);
if (Tokenizer_handle_template_param(self))
return NULL;
}
@@ -1324,10 +1295,14 @@ Tokenizer_parse(Tokenizer* self, int context)
Tokenizer_write_text(self, this);
}
else if (this == next && next == *"[") {
if (Tokenizer_parse_wikilink(self))
return NULL;
if (self->topstack->context & LC_FAIL_NEXT)
self->topstack->context ^= LC_FAIL_NEXT;
if (!(this_context & LC_WIKILINK_TITLE)) {
if (Tokenizer_parse_wikilink(self))
return NULL;
if (self->topstack->context & LC_FAIL_NEXT)
self->topstack->context ^= LC_FAIL_NEXT;
}
else
Tokenizer_write_text(self, this);
}
else if (this == *"|" && this_context & LC_WIKILINK_TITLE) {
if (Tokenizer_handle_wikilink_separator(self))
@@ -1401,7 +1376,8 @@ Tokenizer_tokenize(Tokenizer* self, PyObject* args)
PyMODINIT_FUNC
init_tokenizer(void)
{
PyObject *module, *tempmodule, *defmap, *deflist, *globals, *locals, *fromlist, *modname;
PyObject *module, *tempmod, *defmap, *deflist, *globals, *locals,
*fromlist, *modname;
unsigned numdefs, i;
char *name;

@@ -1411,14 +1387,16 @@ init_tokenizer(void)
module = Py_InitModule("_tokenizer", module_methods);
Py_INCREF(&TokenizerType);
PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType);
Py_INCREF(Py_True);
PyDict_SetItemString(TokenizerType.tp_dict, "USES_C", Py_True);

tempmodule = PyImport_ImportModule("htmlentitydefs");
if (!tempmodule)
tempmod = PyImport_ImportModule("htmlentitydefs");
if (!tempmod)
return;
defmap = PyObject_GetAttrString(tempmodule, "entitydefs");
defmap = PyObject_GetAttrString(tempmod, "entitydefs");
if (!defmap)
return;
Py_DECREF(tempmodule);
Py_DECREF(tempmod);
deflist = PyDict_Keys(defmap);
if (!deflist)
return;
@@ -1442,18 +1420,20 @@ init_tokenizer(void)
if (!modname)
return;
PyList_SET_ITEM(fromlist, 0, modname);
tempmodule = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0);
tempmod = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0);
Py_DECREF(fromlist);
if (!tempmodule)
if (!tempmod)
return;
tokens = PyObject_GetAttrString(tempmodule, "tokens");
Py_DECREF(tempmodule);
tokens = PyObject_GetAttrString(tempmod, "tokens");
Py_DECREF(tempmod);

Text = PyObject_GetAttrString(tokens, "Text");

TemplateOpen = PyObject_GetAttrString(tokens, "TemplateOpen");
TemplateParamSeparator = PyObject_GetAttrString(tokens, "TemplateParamSeparator");
TemplateParamEquals = PyObject_GetAttrString(tokens, "TemplateParamEquals");
TemplateParamSeparator = PyObject_GetAttrString(tokens,
"TemplateParamSeparator");
TemplateParamEquals = PyObject_GetAttrString(tokens,
"TemplateParamEquals");
TemplateClose = PyObject_GetAttrString(tokens, "TemplateClose");

ArgumentOpen = PyObject_GetAttrString(tokens, "ArgumentOpen");


+ 7
- 3
mwparserfromhell/parser/tokenizer.h View File

@@ -1,6 +1,6 @@
/*
Tokenizer Header File for MWParserFromHell
Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
@@ -36,6 +36,10 @@ SOFTWARE.
#define malloc PyObject_Malloc
#define free PyObject_Free

#define DIGITS "0123456789"
#define HEXDIGITS "0123456789abcdefABCDEF"
#define ALPHANUM "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"

static const char* MARKERS[] = {
"{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", "/", "-",
"!", "\n", ""};
@@ -118,6 +122,7 @@ static PyObject* TagCloseClose;

#define LC_COMMENT 0x02000

#define LC_SAFETY_CHECK 0xFC000
#define LC_HAS_TEXT 0x04000
#define LC_FAIL_ON_TEXT 0x08000
#define LC_FAIL_NEXT 0x10000
@@ -205,8 +210,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer*);
static int Tokenizer_really_parse_entity(Tokenizer*);
static int Tokenizer_parse_entity(Tokenizer*);
static int Tokenizer_parse_comment(Tokenizer*);
static void Tokenizer_verify_safe(Tokenizer*, int, Py_UNICODE);
static void Tokenizer_reset_safety_checks(Tokenizer*);
static int Tokenizer_verify_safe(Tokenizer*, int, Py_UNICODE);
static PyObject* Tokenizer_parse(Tokenizer*, int);
static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*);



+ 68
- 33
mwparserfromhell/parser/tokenizer.py View File

@@ -37,6 +37,7 @@ class BadRoute(Exception):

class Tokenizer(object):
"""Creates a list of tokens from a string of wikicode."""
USES_C = False
START = object()
END = object()
MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":",
@@ -212,28 +213,9 @@ class Tokenizer(object):
self._write_all(argument)
self._write(tokens.ArgumentClose())

def _verify_safe(self, unsafes, strip=True):
"""Verify that there are no unsafe characters in the current stack.

The route will be failed if the name contains any element of *unsafes*
in it. This is used when parsing template names, parameter keys, and so
on, which cannot contain newlines and some other characters. If *strip*
is ``True``, the text will be stripped of whitespace, since this is
allowed at the ends of certain elements but not between text.
"""
self._push_textbuffer()
if self._stack:
text = [tok for tok in self._stack if isinstance(tok, tokens.Text)]
text = "".join([token.text for token in text])
if strip:
text = text.strip()
if text and any([unsafe in text for unsafe in unsafes]):
self._fail_route()

def _handle_template_param(self):
"""Handle a template parameter at the head of the string."""
if self._context & contexts.TEMPLATE_NAME:
self._verify_safe(["\n", "{", "}", "[", "]"])
self._context ^= contexts.TEMPLATE_NAME
elif self._context & contexts.TEMPLATE_PARAM_VALUE:
self._context ^= contexts.TEMPLATE_PARAM_VALUE
@@ -245,11 +227,6 @@ class Tokenizer(object):

def _handle_template_param_value(self):
"""Handle a template parameter's value at the head of the string."""
try:
self._verify_safe(["\n", "{{", "}}"])
except BadRoute:
self._pop()
raise
self._write_all(self._pop(keep_context=True))
self._context ^= contexts.TEMPLATE_PARAM_KEY
self._context |= contexts.TEMPLATE_PARAM_VALUE
@@ -257,24 +234,19 @@ class Tokenizer(object):

def _handle_template_end(self):
"""Handle the end of a template at the head of the string."""
if self._context & contexts.TEMPLATE_NAME:
self._verify_safe(["\n", "{", "}", "[", "]"])
elif self._context & contexts.TEMPLATE_PARAM_KEY:
if self._context & contexts.TEMPLATE_PARAM_KEY:
self._write_all(self._pop(keep_context=True))
self._head += 1
return self._pop()

def _handle_argument_separator(self):
"""Handle the separator between an argument's name and default."""
self._verify_safe(["\n", "{{", "}}"])
self._context ^= contexts.ARGUMENT_NAME
self._context |= contexts.ARGUMENT_DEFAULT
self._write(tokens.ArgumentSeparator())

def _handle_argument_end(self):
"""Handle the end of an argument at the head of the string."""
if self._context & contexts.ARGUMENT_NAME:
self._verify_safe(["\n", "{{", "}}"])
self._head += 2
return self._pop()

@@ -294,15 +266,12 @@ class Tokenizer(object):

def _handle_wikilink_separator(self):
"""Handle the separator between a wikilink's title and its text."""
self._verify_safe(["\n", "{", "}", "[", "]"], strip=False)
self._context ^= contexts.WIKILINK_TITLE
self._context |= contexts.WIKILINK_TEXT
self._write(tokens.WikilinkSeparator())

def _handle_wikilink_end(self):
"""Handle the end of a wikilink at the head of the string."""
if self._context & contexts.WIKILINK_TITLE:
self._verify_safe(["\n", "{", "}", "[", "]"], strip=False)
self._head += 1
return self._pop()

@@ -423,11 +392,73 @@ class Tokenizer(object):
self._write(tokens.CommentEnd())
self._head += 2

def _verify_safe(self, this):
"""Make sure we are not trying to write an invalid character."""
context = self._context
if context & contexts.FAIL_NEXT:
return False
if context & contexts.WIKILINK_TITLE:
if this == "]" or this == "{":
self._context |= contexts.FAIL_NEXT
elif this == "\n" or this == "[" or this == "}":
return False
return True
if context & contexts.TEMPLATE_NAME:
if this == "{" or this == "}" or this == "[":
self._context |= contexts.FAIL_NEXT
return True
if this == "]":
return False
if this == "|":
return True
if context & contexts.HAS_TEXT:
if context & contexts.FAIL_ON_TEXT:
if this is self.END or not this.isspace():
return False
else:
if this == "\n":
self._context |= contexts.FAIL_ON_TEXT
elif this is not self.END or not this.isspace():
self._context |= contexts.HAS_TEXT
return True
else:
if context & contexts.FAIL_ON_EQUALS:
if this == "=":
return False
elif context & contexts.FAIL_ON_LBRACE:
if this == "{" or (self._read(-1) == self._read(-2) == "{"):
if context & contexts.TEMPLATE:
self._context |= contexts.FAIL_ON_EQUALS
else:
self._context |= contexts.FAIL_NEXT
return True
self._context ^= contexts.FAIL_ON_LBRACE
elif context & contexts.FAIL_ON_RBRACE:
if this == "}":
if context & contexts.TEMPLATE:
self._context |= contexts.FAIL_ON_EQUALS
else:
self._context |= contexts.FAIL_NEXT
return True
self._context ^= contexts.FAIL_ON_RBRACE
elif this == "{":
self._context |= contexts.FAIL_ON_LBRACE
elif this == "}":
self._context |= contexts.FAIL_ON_RBRACE
return True

def _parse(self, context=0):
"""Parse the wikicode string, using *context* for when to stop."""
self._push(context)
while True:
this = self._read()
unsafe = (contexts.TEMPLATE_NAME | contexts.WIKILINK_TITLE |
contexts.TEMPLATE_PARAM_KEY | contexts.ARGUMENT_NAME)
if self._context & unsafe:
if not self._verify_safe(this):
if self._context & contexts.TEMPLATE_PARAM_KEY:
self._pop()
self._fail_route()
if this not in self.MARKERS:
self._write_text(this)
self._head += 1
@@ -449,6 +480,8 @@ class Tokenizer(object):
self._write_text(this)
elif this == next == "{":
self._parse_template_or_argument()
if self._context & contexts.FAIL_NEXT:
self._context ^= contexts.FAIL_NEXT
elif this == "|" and self._context & contexts.TEMPLATE:
self._handle_template_param()
elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY:
@@ -465,6 +498,8 @@ class Tokenizer(object):
elif this == next == "[":
if not self._context & contexts.WIKILINK_TITLE:
self._parse_wikilink()
if self._context & contexts.FAIL_NEXT:
self._context ^= contexts.FAIL_NEXT
else:
self._write_text("[")
elif this == "|" and self._context & contexts.WIKILINK_TITLE:


+ 133
- 44
mwparserfromhell/smart_list.py View File

@@ -41,8 +41,23 @@ def inheritdoc(method):
method.__doc__ = getattr(list, method.__name__).__doc__
return method

class _SliceNormalizerMixIn(object):
"""MixIn that provides a private method to normalize slices."""

class SmartList(list):
def _normalize_slice(self, key):
"""Return a slice equivalent to the input *key*, standardized."""
if key.start is not None:
start = (len(self) + key.start) if key.start < 0 else key.start
else:
start = 0
if key.stop is not None:
stop = (len(self) + key.stop) if key.stop < 0 else key.stop
else:
stop = maxsize
return slice(start, stop, key.step or 1)


class SmartList(_SliceNormalizerMixIn, list):
"""Implements the ``list`` interface with special handling of sublists.

When a sublist is created (by ``list[i:j]``), any changes made to this
@@ -76,7 +91,8 @@ class SmartList(list):
def __getitem__(self, key):
if not isinstance(key, slice):
return super(SmartList, self).__getitem__(key)
sliceinfo = [key.start or 0, key.stop or 0, key.step or 1]
key = self._normalize_slice(key)
sliceinfo = [key.start, key.stop, key.step]
child = _ListProxy(self, sliceinfo)
self._children[id(child)] = (child, sliceinfo)
return child
@@ -86,25 +102,28 @@ class SmartList(list):
return super(SmartList, self).__setitem__(key, item)
item = list(item)
super(SmartList, self).__setitem__(key, item)
diff = len(item) - key.stop + key.start
key = self._normalize_slice(key)
diff = len(item) + (key.start - key.stop) // key.step
values = self._children.values if py3k else self._children.itervalues
if diff:
for child, (start, stop, step) in values():
if start >= key.stop:
if start > key.stop:
self._children[id(child)][1][0] += diff
if stop >= key.stop and stop != maxsize:
self._children[id(child)][1][1] += diff

def __delitem__(self, key):
super(SmartList, self).__delitem__(key)
if not isinstance(key, slice):
key = slice(key, key + 1)
diff = key.stop - key.start
if isinstance(key, slice):
key = self._normalize_slice(key)
else:
key = slice(key, key + 1, 1)
diff = (key.stop - key.start) // key.step
values = self._children.values if py3k else self._children.itervalues
for child, (start, stop, step) in values():
if start > key.start:
self._children[id(child)][1][0] -= diff
if stop >= key.stop:
if stop >= key.stop and stop != maxsize:
self._children[id(child)][1][1] -= diff

if not py3k:
@@ -160,24 +179,35 @@ class SmartList(list):
child._parent = copy
super(SmartList, self).reverse()

@inheritdoc
def sort(self, cmp=None, key=None, reverse=None):
copy = list(self)
for child in self._children:
child._parent = copy
if cmp is not None:
if py3k:
@inheritdoc
def sort(self, key=None, reverse=None):
copy = list(self)
for child in self._children:
child._parent = copy
kwargs = {}
if key is not None:
if reverse is not None:
super(SmartList, self).sort(cmp, key, reverse)
else:
super(SmartList, self).sort(cmp, key)
else:
super(SmartList, self).sort(cmp)
else:
super(SmartList, self).sort()
kwargs["key"] = key
if reverse is not None:
kwargs["reverse"] = reverse
super(SmartList, self).sort(**kwargs)
else:
@inheritdoc
def sort(self, cmp=None, key=None, reverse=None):
copy = list(self)
for child in self._children:
child._parent = copy
kwargs = {}
if cmp is not None:
kwargs["cmp"] = cmp
if key is not None:
kwargs["key"] = key
if reverse is not None:
kwargs["reverse"] = reverse
super(SmartList, self).sort(**kwargs)


class _ListProxy(list):
class _ListProxy(_SliceNormalizerMixIn, list):
"""Implement the ``list`` interface by getting elements from a parent.

This is created by a :py:class:`~.SmartList` object when slicing. It does
@@ -231,25 +261,52 @@ class _ListProxy(list):
return bool(self._render())

def __len__(self):
return (self._stop - self._start) / self._step
return (self._stop - self._start) // self._step

def __getitem__(self, key):
return self._render()[key]
if isinstance(key, slice):
key = self._normalize_slice(key)
if key.stop == maxsize:
keystop = self._stop
else:
keystop = key.stop + self._start
adjusted = slice(key.start + self._start, keystop, key.step)
return self._parent[adjusted]
else:
return self._render()[key]

def __setitem__(self, key, item):
if isinstance(key, slice):
adjusted = slice(key.start + self._start, key.stop + self._stop,
key.step)
key = self._normalize_slice(key)
if key.stop == maxsize:
keystop = self._stop
else:
keystop = key.stop + self._start
adjusted = slice(key.start + self._start, keystop, key.step)
self._parent[adjusted] = item
else:
length = len(self)
if key < 0:
key = length + key
if key < 0 or key >= length:
raise IndexError("list assignment index out of range")
self._parent[self._start + key] = item

def __delitem__(self, key):
if isinstance(key, slice):
adjusted = slice(key.start + self._start, key.stop + self._stop,
key.step)
key = self._normalize_slice(key)
if key.stop == maxsize:
keystop = self._stop
else:
keystop = key.stop + self._start
adjusted = slice(key.start + self._start, keystop, key.step)
del self._parent[adjusted]
else:
length = len(self)
if key < 0:
key = length + key
if key < 0 or key >= length:
raise IndexError("list assignment index out of range")
del self._parent[self._start + key]

def __iter__(self):
@@ -287,6 +344,16 @@ class _ListProxy(list):
self.extend(other)
return self

def __mul__(self, other):
return SmartList(list(self) * other)

def __rmul__(self, other):
return SmartList(other * list(self))

def __imul__(self, other):
self.extend(list(self) * (other - 1))
return self

@property
def _start(self):
"""The starting index of this list, inclusive."""
@@ -295,6 +362,8 @@ class _ListProxy(list):
@property
def _stop(self):
"""The ending index of this list, exclusive."""
if self._sliceinfo[1] == maxsize:
return len(self._parent)
return self._sliceinfo[1]

@property
@@ -328,18 +397,25 @@ class _ListProxy(list):

@inheritdoc
def insert(self, index, item):
if index < 0:
index = len(self) + index
self._parent.insert(self._start + index, item)

@inheritdoc
def pop(self, index=None):
length = len(self)
if index is None:
index = len(self) - 1
index = length - 1
elif index < 0:
index = length + index
if index < 0 or index >= length:
raise IndexError("pop index out of range")
return self._parent.pop(self._start + index)

@inheritdoc
def remove(self, item):
index = self.index(item)
del self._parent[index]
del self._parent[self._start + index]

@inheritdoc
def reverse(self):
@@ -347,17 +423,30 @@ class _ListProxy(list):
item.reverse()
self._parent[self._start:self._stop:self._step] = item

@inheritdoc
def sort(self, cmp=None, key=None, reverse=None):
item = self._render()
if cmp is not None:
if py3k:
@inheritdoc
def sort(self, key=None, reverse=None):
item = self._render()
kwargs = {}
if key is not None:
if reverse is not None:
item.sort(cmp, key, reverse)
else:
item.sort(cmp, key)
else:
item.sort(cmp)
else:
item.sort()
self._parent[self._start:self._stop:self._step] = item
kwargs["key"] = key
if reverse is not None:
kwargs["reverse"] = reverse
item.sort(**kwargs)
self._parent[self._start:self._stop:self._step] = item
else:
@inheritdoc
def sort(self, cmp=None, key=None, reverse=None):
item = self._render()
kwargs = {}
if cmp is not None:
kwargs["cmp"] = cmp
if key is not None:
kwargs["key"] = key
if reverse is not None:
kwargs["reverse"] = reverse
item.sort(**kwargs)
self._parent[self._start:self._stop:self._step] = item


del inheritdoc

+ 82
- 25
mwparserfromhell/string_mixin.py View File

@@ -114,6 +114,9 @@ class StringMixIn(object):
def __getitem__(self, key):
return self.__unicode__()[key]

def __reversed__(self):
return reversed(self.__unicode__())

def __contains__(self, item):
if isinstance(item, StringMixIn):
return str(item) in self.__unicode__()
@@ -123,6 +126,11 @@ class StringMixIn(object):
def capitalize(self):
return self.__unicode__().capitalize()

if py3k:
@inheritdoc
def casefold(self):
return self.__unicode__().casefold()

@inheritdoc
def center(self, width, fillchar=None):
if fillchar is None:
@@ -136,19 +144,21 @@ class StringMixIn(object):
if not py3k:
@inheritdoc
def decode(self, encoding=None, errors=None):
if errors is None:
if encoding is None:
return self.__unicode__().decode()
return self.__unicode__().decode(encoding)
return self.__unicode__().decode(encoding, errors)
kwargs = {}
if encoding is not None:
kwargs["encoding"] = encoding
if errors is not None:
kwargs["errors"] = errors
return self.__unicode__().decode(**kwargs)

@inheritdoc
def encode(self, encoding=None, errors=None):
if errors is None:
if encoding is None:
return self.__unicode__().encode()
return self.__unicode__().encode(encoding)
return self.__unicode__().encode(encoding, errors)
kwargs = {}
if encoding is not None:
kwargs["encoding"] = encoding
if errors is not None:
kwargs["errors"] = errors
return self.__unicode__().encode(**kwargs)

@inheritdoc
def endswith(self, prefix, start=None, end=None):
@@ -168,6 +178,11 @@ class StringMixIn(object):
def format(self, *args, **kwargs):
return self.__unicode__().format(*args, **kwargs)

if py3k:
@inheritdoc
def format_map(self, mapping):
return self.__unicode__().format_map(mapping)

@inheritdoc
def index(self, sub, start=None, end=None):
return self.__unicode__().index(sub, start, end)
@@ -188,6 +203,11 @@ class StringMixIn(object):
def isdigit(self):
return self.__unicode__().isdigit()

if py3k:
@inheritdoc
def isidentifier(self):
return self.__unicode__().isidentifier()

@inheritdoc
def islower(self):
return self.__unicode__().islower()
@@ -196,6 +216,11 @@ class StringMixIn(object):
def isnumeric(self):
return self.__unicode__().isnumeric()

if py3k:
@inheritdoc
def isprintable(self):
return self.__unicode__().isprintable()

@inheritdoc
def isspace(self):
return self.__unicode__().isspace()
@@ -226,12 +251,24 @@ class StringMixIn(object):
def lstrip(self, chars=None):
return self.__unicode__().lstrip(chars)

if py3k:
@staticmethod
@inheritdoc
def maketrans(self, x, y=None, z=None):
if z is None:
if y is None:
return self.__unicode__.maketrans(x)
return self.__unicode__.maketrans(x, y)
return self.__unicode__.maketrans(x, y, z)

@inheritdoc
def partition(self, sep):
return self.__unicode__().partition(sep)

@inheritdoc
def replace(self, old, new, count):
def replace(self, old, new, count=None):
if count is None:
return self.__unicode__().replace(old, new)
return self.__unicode__().replace(old, new, count)

@inheritdoc
@@ -252,25 +289,45 @@ class StringMixIn(object):
def rpartition(self, sep):
return self.__unicode__().rpartition(sep)

@inheritdoc
def rsplit(self, sep=None, maxsplit=None):
if maxsplit is None:
if sep is None:
return self.__unicode__().rsplit()
return self.__unicode__().rsplit(sep)
return self.__unicode__().rsplit(sep, maxsplit)
if py3k:
@inheritdoc
def rsplit(self, sep=None, maxsplit=None):
kwargs = {}
if sep is not None:
kwargs["sep"] = sep
if maxsplit is not None:
kwargs["maxsplit"] = maxsplit
return self.__unicode__().rsplit(**kwargs)
else:
@inheritdoc
def rsplit(self, sep=None, maxsplit=None):
if maxsplit is None:
if sep is None:
return self.__unicode__().rsplit()
return self.__unicode__().rsplit(sep)
return self.__unicode__().rsplit(sep, maxsplit)

@inheritdoc
def rstrip(self, chars=None):
return self.__unicode__().rstrip(chars)

@inheritdoc
def split(self, sep=None, maxsplit=None):
if maxsplit is None:
if sep is None:
return self.__unicode__().split()
return self.__unicode__().split(sep)
return self.__unicode__().split(sep, maxsplit)
if py3k:
@inheritdoc
def split(self, sep=None, maxsplit=None):
kwargs = {}
if sep is not None:
kwargs["sep"] = sep
if maxsplit is not None:
kwargs["maxsplit"] = maxsplit
return self.__unicode__().split(**kwargs)
else:
@inheritdoc
def split(self, sep=None, maxsplit=None):
if maxsplit is None:
if sep is None:
return self.__unicode__().split()
return self.__unicode__().split(sep)
return self.__unicode__().split(sep, maxsplit)

@inheritdoc
def splitlines(self, keepends=None):


+ 2
- 1
setup.py View File

@@ -24,6 +24,7 @@
from setuptools import setup, find_packages, Extension

from mwparserfromhell import __version__
from mwparserfromhell.compat import py3k

with open("README.rst") as fp:
long_docs = fp.read()
@@ -37,7 +38,7 @@ tokenizer = Extension("mwparserfromhell.parser._tokenizer",
setup(
name = "mwparserfromhell",
packages = find_packages(exclude=("tests",)),
ext_modules = [tokenizer],
ext_modules = [] if py3k else [tokenizer],
test_suite = "tests",
version = __version__,
author = "Ben Kurtovic",


+ 130
- 0
tests/MWPFHTestCase.tmlanguage View File

@@ -0,0 +1,130 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>fileTypes</key>
<array>
<string>mwtest</string>
</array>
<key>name</key>
<string>MWParserFromHell Test Case</string>
<key>patterns</key>
<array>
<dict>
<key>match</key>
<string>---</string>
<key>name</key>
<string>markup.heading.divider.mwpfh</string>
</dict>
<dict>
<key>captures</key>
<dict>
<key>1</key>
<dict>
<key>name</key>
<string>keyword.other.name.mwpfh</string>
</dict>
<key>2</key>
<dict>
<key>name</key>
<string>variable.other.name.mwpfh</string>
</dict>
</dict>
<key>match</key>
<string>(name:)\s*(\w*)</string>
<key>name</key>
<string>meta.name.mwpfh</string>
</dict>
<dict>
<key>captures</key>
<dict>
<key>1</key>
<dict>
<key>name</key>
<string>keyword.other.label.mwpfh</string>
</dict>
<key>2</key>
<dict>
<key>name</key>
<string>comment.line.other.label.mwpfh</string>
</dict>
</dict>
<key>match</key>
<string>(label:)\s*(.*)</string>
<key>name</key>
<string>meta.label.mwpfh</string>
</dict>
<dict>
<key>captures</key>
<dict>
<key>1</key>
<dict>
<key>name</key>
<string>keyword.other.input.mwpfh</string>
</dict>
<key>2</key>
<dict>
<key>name</key>
<string>string.quoted.double.input.mwpfh</string>
</dict>
</dict>
<key>match</key>
<string>(input:)\s*(.*)</string>
<key>name</key>
<string>meta.input.mwpfh</string>
</dict>
<dict>
<key>captures</key>
<dict>
<key>1</key>
<dict>
<key>name</key>
<string>keyword.other.output.mwpfh</string>
</dict>
</dict>
<key>match</key>
<string>(output:)</string>
<key>name</key>
<string>meta.output.mwpfh</string>
</dict>
<dict>
<key>captures</key>
<dict>
<key>1</key>
<dict>
<key>name</key>
<string>support.language.token.mwpfh</string>
</dict>
</dict>
<key>match</key>
<string>(\w+)\s*\(</string>
<key>name</key>
<string>meta.name.token.mwpfh</string>
</dict>
<dict>
<key>captures</key>
<dict>
<key>1</key>
<dict>
<key>name</key>
<string>variable.parameter.token.mwpfh</string>
</dict>
</dict>
<key>match</key>
<string>(\w+)\s*(=)</string>
<key>name</key>
<string>meta.name.parameter.token.mwpfh</string>
</dict>
<dict>
<key>match</key>
<string>".*?"</string>
<key>name</key>
<string>string.quoted.double.mwpfh</string>
</dict>
</array>
<key>scopeName</key>
<string>text.mwpfh</string>
<key>uuid</key>
<string>cd3e2ffa-a57d-4c40-954f-1a2e87ffd638</string>
</dict>
</plist>

+ 121
- 0
tests/_test_tokenizer.py View File

@@ -0,0 +1,121 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from __future__ import print_function, unicode_literals
from os import listdir, path

from mwparserfromhell.compat import py3k
from mwparserfromhell.parser import tokens

class _TestParseError(Exception):
"""Raised internally when a test could not be parsed."""
pass


class TokenizerTestCase(object):
"""A base test case for tokenizers, whose tests are loaded dynamically.

Subclassed along with unittest.TestCase to form TestPyTokenizer and
TestCTokenizer. Tests are loaded dynamically from files in the 'tokenizer'
directory.
"""

@classmethod
def _build_test_method(cls, funcname, data):
"""Create and return a method to be treated as a test case method.

*data* is a dict containing multiple keys: the *input* text to be
tokenized, the expected list of tokens as *output*, and an optional
*label* for the method's docstring.
"""
def inner(self):
expected = data["output"]
actual = self.tokenizer().tokenize(data["input"])
self.assertEqual(expected, actual)
if not py3k:
inner.__name__ = funcname.encode("utf8")
inner.__doc__ = data["label"]
return inner

@classmethod
def _load_tests(cls, filename, text):
"""Load all tests in *text* from the file *filename*."""
tests = text.split("\n---\n")
counter = 1
digits = len(str(len(tests)))
for test in tests:
data = {"name": None, "label": None, "input": None, "output": None}
try:
for line in test.strip().splitlines():
if line.startswith("name:"):
data["name"] = line[len("name:"):].strip()
elif line.startswith("label:"):
data["label"] = line[len("label:"):].strip()
elif line.startswith("input:"):
raw = line[len("input:"):].strip()
if raw[0] == '"' and raw[-1] == '"':
raw = raw[1:-1]
raw = raw.encode("raw_unicode_escape")
data["input"] = raw.decode("unicode_escape")
elif line.startswith("output:"):
raw = line[len("output:"):].strip()
try:
data["output"] = eval(raw, vars(tokens))
except Exception as err:
raise _TestParseError(err)
except _TestParseError as err:
if data["name"]:
error = "Could not parse test '{0}' in '{1}':\n\t{2}"
print(error.format(data["name"], filename, err))
else:
error = "Could not parse a test in '{0}':\n\t{1}"
print(error.format(filename, err))
continue
if not data["name"]:
error = "A test in '{0}' was ignored because it lacked a name"
print(error.format(filename))
continue
if data["input"] is None or data["output"] is None:
error = "Test '{0}' in '{1}' was ignored because it lacked an input or an output"
print(error.format(data["name"], filename))
continue
number = str(counter).zfill(digits)
fname = "test_{0}{1}_{2}".format(filename, number, data["name"])
meth = cls._build_test_method(fname, data)
setattr(cls, fname, meth)
counter += 1

@classmethod
def build(cls):
"""Load and install all tests from the 'tokenizer' directory."""
directory = path.join(path.dirname(__file__), "tokenizer")
extension = ".mwtest"
for filename in listdir(directory):
if not filename.endswith(extension):
continue
with open(path.join(directory, filename), "r") as fp:
text = fp.read()
if not py3k:
text = text.decode("utf8")
cls._load_tests(filename[:0-len(extension)], text)

TokenizerTestCase.build()

+ 113
- 0
tests/_test_tree_equality.py View File

@@ -0,0 +1,113 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from __future__ import unicode_literals
from unittest import TestCase

from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity,
Tag, Template, Text, Wikilink)
from mwparserfromhell.nodes.extras import Attribute, Parameter
from mwparserfromhell.wikicode import Wikicode

class TreeEqualityTestCase(TestCase):
"""A base test case with support for comparing the equality of node trees.

This adds a number of type equality functions, for Wikicode, Text,
Templates, and Wikilinks.
"""

def assertNodeEqual(self, expected, actual):
"""Assert that two Nodes have the same type and have the same data."""
registry = {
Argument: self.assertArgumentNodeEqual,
Comment: self.assertCommentNodeEqual,
Heading: self.assertHeadingNodeEqual,
HTMLEntity: self.assertHTMLEntityNodeEqual,
Tag: self.assertTagNodeEqual,
Template: self.assertTemplateNodeEqual,
Text: self.assertTextNodeEqual,
Wikilink: self.assertWikilinkNodeEqual
}
for nodetype in registry:
if isinstance(expected, nodetype):
self.assertIsInstance(actual, nodetype)
registry[nodetype](expected, actual)

def assertArgumentNodeEqual(self, expected, actual):
"""Assert that two Argument nodes have the same data."""
self.assertWikicodeEqual(expected.name, actual.name)
if expected.default is not None:
self.assertWikicodeEqual(expected.default, actual.default)
else:
self.assertIs(None, actual.default)

def assertCommentNodeEqual(self, expected, actual):
"""Assert that two Comment nodes have the same data."""
self.assertWikicodeEqual(expected.contents, actual.contents)

def assertHeadingNodeEqual(self, expected, actual):
"""Assert that two Heading nodes have the same data."""
self.assertWikicodeEqual(expected.title, actual.title)
self.assertEqual(expected.level, actual.level)

def assertHTMLEntityNodeEqual(self, expected, actual):
"""Assert that two HTMLEntity nodes have the same data."""
self.assertEqual(expected.value, actual.value)
self.assertIs(expected.named, actual.named)
self.assertIs(expected.hexadecimal, actual.hexadecimal)
self.assertEqual(expected.hex_char, actual.hex_char)

def assertTagNodeEqual(self, expected, actual):
"""Assert that two Tag nodes have the same data."""
self.fail("Holding this until feature/html_tags is ready.")

def assertTemplateNodeEqual(self, expected, actual):
"""Assert that two Template nodes have the same data."""
self.assertWikicodeEqual(expected.name, actual.name)
length = len(expected.params)
self.assertEqual(length, len(actual.params))
for i in range(length):
exp_param = expected.params[i]
act_param = actual.params[i]
self.assertWikicodeEqual(exp_param.name, act_param.name)
self.assertWikicodeEqual(exp_param.value, act_param.value)
self.assertIs(exp_param.showkey, act_param.showkey)

def assertTextNodeEqual(self, expected, actual):
"""Assert that two Text nodes have the same data."""
self.assertEqual(expected.value, actual.value)

def assertWikilinkNodeEqual(self, expected, actual):
"""Assert that two Wikilink nodes have the same data."""
self.assertWikicodeEqual(expected.title, actual.title)
if expected.text is not None:
self.assertWikicodeEqual(expected.text, actual.text)
else:
self.assertIs(None, actual.text)

def assertWikicodeEqual(self, expected, actual):
"""Assert that two Wikicode objects have the same data."""
self.assertIsInstance(actual, Wikicode)
length = len(expected.nodes)
self.assertEqual(length, len(actual.nodes))
for i in range(length):
self.assertNodeEqual(expected.get(i), actual.get(i))

+ 20
- 0
tests/compat.py View File

@@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-

"""
Serves the same purpose as mwparserfromhell.compat, but only for objects
required by unit tests. This avoids unnecessary imports (like urllib) within
the main library.
"""

from mwparserfromhell.compat import py3k

if py3k:
range = range
from io import StringIO
from urllib.parse import urlencode
from urllib.request import urlopen

else:
range = xrange
from StringIO import StringIO
from urllib import urlencode, urlopen

+ 261
- 0
tests/test_builder.py View File

@@ -0,0 +1,261 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from __future__ import unicode_literals
import unittest

from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity,
Tag, Template, Text, Wikilink)
from mwparserfromhell.nodes.extras import Attribute, Parameter
from mwparserfromhell.parser import tokens
from mwparserfromhell.parser.builder import Builder
from mwparserfromhell.smart_list import SmartList
from mwparserfromhell.wikicode import Wikicode

from ._test_tree_equality import TreeEqualityTestCase

wrap = lambda L: Wikicode(SmartList(L))

class TestBuilder(TreeEqualityTestCase):
"""Tests for the builder, which turns tokens into Wikicode objects."""

def setUp(self):
self.builder = Builder()

def test_text(self):
"""tests for building Text nodes"""
tests = [
([tokens.Text(text="foobar")], wrap([Text("foobar")])),
([tokens.Text(text="fóóbar")], wrap([Text("fóóbar")])),
([tokens.Text(text="spam"), tokens.Text(text="eggs")],
wrap([Text("spam"), Text("eggs")])),
]
for test, valid in tests:
self.assertWikicodeEqual(valid, self.builder.build(test))

def test_template(self):
"""tests for building Template nodes"""
tests = [
([tokens.TemplateOpen(), tokens.Text(text="foobar"),
tokens.TemplateClose()],
wrap([Template(wrap([Text("foobar")]))])),

([tokens.TemplateOpen(), tokens.Text(text="spam"),
tokens.Text(text="eggs"), tokens.TemplateClose()],
wrap([Template(wrap([Text("spam"), Text("eggs")]))])),

([tokens.TemplateOpen(), tokens.Text(text="foo"),
tokens.TemplateParamSeparator(), tokens.Text(text="bar"),
tokens.TemplateClose()],
wrap([Template(wrap([Text("foo")]), params=[
Parameter(wrap([Text("1")]), wrap([Text("bar")]),
showkey=False)])])),

([tokens.TemplateOpen(), tokens.Text(text="foo"),
tokens.TemplateParamSeparator(), tokens.Text(text="bar"),
tokens.TemplateParamEquals(), tokens.Text(text="baz"),
tokens.TemplateClose()],
wrap([Template(wrap([Text("foo")]), params=[
Parameter(wrap([Text("bar")]), wrap([Text("baz")]))])])),

([tokens.TemplateOpen(), tokens.Text(text="foo"),
tokens.TemplateParamSeparator(), tokens.Text(text="bar"),
tokens.TemplateParamEquals(), tokens.Text(text="baz"),
tokens.TemplateParamSeparator(), tokens.Text(text="biz"),
tokens.TemplateParamSeparator(), tokens.Text(text="buzz"),
tokens.TemplateParamSeparator(), tokens.Text(text="3"),
tokens.TemplateParamEquals(), tokens.Text(text="buff"),
tokens.TemplateParamSeparator(), tokens.Text(text="baff"),
tokens.TemplateClose()],
wrap([Template(wrap([Text("foo")]), params=[
Parameter(wrap([Text("bar")]), wrap([Text("baz")])),
Parameter(wrap([Text("1")]), wrap([Text("biz")]),
showkey=False),
Parameter(wrap([Text("2")]), wrap([Text("buzz")]),
showkey=False),
Parameter(wrap([Text("3")]), wrap([Text("buff")])),
Parameter(wrap([Text("3")]), wrap([Text("baff")]),
showkey=False)])])),
]
for test, valid in tests:
self.assertWikicodeEqual(valid, self.builder.build(test))

def test_argument(self):
"""tests for building Argument nodes"""
tests = [
([tokens.ArgumentOpen(), tokens.Text(text="foobar"),
tokens.ArgumentClose()],
wrap([Argument(wrap([Text("foobar")]))])),

([tokens.ArgumentOpen(), tokens.Text(text="spam"),
tokens.Text(text="eggs"), tokens.ArgumentClose()],
wrap([Argument(wrap([Text("spam"), Text("eggs")]))])),

([tokens.ArgumentOpen(), tokens.Text(text="foo"),
tokens.ArgumentSeparator(), tokens.Text(text="bar"),
tokens.ArgumentClose()],
wrap([Argument(wrap([Text("foo")]), wrap([Text("bar")]))])),

([tokens.ArgumentOpen(), tokens.Text(text="foo"),
tokens.Text(text="bar"), tokens.ArgumentSeparator(),
tokens.Text(text="baz"), tokens.Text(text="biz"),
tokens.ArgumentClose()],
wrap([Argument(wrap([Text("foo"), Text("bar")]),
wrap([Text("baz"), Text("biz")]))])),
]
for test, valid in tests:
self.assertWikicodeEqual(valid, self.builder.build(test))

def test_wikilink(self):
"""tests for building Wikilink nodes"""
tests = [
([tokens.WikilinkOpen(), tokens.Text(text="foobar"),
tokens.WikilinkClose()],
wrap([Wikilink(wrap([Text("foobar")]))])),

([tokens.WikilinkOpen(), tokens.Text(text="spam"),
tokens.Text(text="eggs"), tokens.WikilinkClose()],
wrap([Wikilink(wrap([Text("spam"), Text("eggs")]))])),

([tokens.WikilinkOpen(), tokens.Text(text="foo"),
tokens.WikilinkSeparator(), tokens.Text(text="bar"),
tokens.WikilinkClose()],
wrap([Wikilink(wrap([Text("foo")]), wrap([Text("bar")]))])),

([tokens.WikilinkOpen(), tokens.Text(text="foo"),
tokens.Text(text="bar"), tokens.WikilinkSeparator(),
tokens.Text(text="baz"), tokens.Text(text="biz"),
tokens.WikilinkClose()],
wrap([Wikilink(wrap([Text("foo"), Text("bar")]),
wrap([Text("baz"), Text("biz")]))])),
]
for test, valid in tests:
self.assertWikicodeEqual(valid, self.builder.build(test))

def test_html_entity(self):
"""tests for building HTMLEntity nodes"""
tests = [
([tokens.HTMLEntityStart(), tokens.Text(text="nbsp"),
tokens.HTMLEntityEnd()],
wrap([HTMLEntity("nbsp", named=True, hexadecimal=False)])),

([tokens.HTMLEntityStart(), tokens.HTMLEntityNumeric(),
tokens.Text(text="107"), tokens.HTMLEntityEnd()],
wrap([HTMLEntity("107", named=False, hexadecimal=False)])),

([tokens.HTMLEntityStart(), tokens.HTMLEntityNumeric(),
tokens.HTMLEntityHex(char="X"), tokens.Text(text="6B"),
tokens.HTMLEntityEnd()],
wrap([HTMLEntity("6B", named=False, hexadecimal=True,
hex_char="X")])),
]
for test, valid in tests:
self.assertWikicodeEqual(valid, self.builder.build(test))

def test_heading(self):
"""tests for building Heading nodes"""
tests = [
([tokens.HeadingStart(level=2), tokens.Text(text="foobar"),
tokens.HeadingEnd()],
wrap([Heading(wrap([Text("foobar")]), 2)])),

([tokens.HeadingStart(level=4), tokens.Text(text="spam"),
tokens.Text(text="eggs"), tokens.HeadingEnd()],
wrap([Heading(wrap([Text("spam"), Text("eggs")]), 4)])),
]
for test, valid in tests:
self.assertWikicodeEqual(valid, self.builder.build(test))

def test_comment(self):
"""tests for building Comment nodes"""
tests = [
([tokens.CommentStart(), tokens.Text(text="foobar"),
tokens.CommentEnd()],
wrap([Comment(wrap([Text("foobar")]))])),

([tokens.CommentStart(), tokens.Text(text="spam"),
tokens.Text(text="eggs"), tokens.CommentEnd()],
wrap([Comment(wrap([Text("spam"), Text("eggs")]))])),
]
for test, valid in tests:
self.assertWikicodeEqual(valid, self.builder.build(test))

@unittest.skip("holding this until feature/html_tags is ready")
def test_tag(self):
"""tests for building Tag nodes"""
pass

def test_integration(self):
"""a test for building a combination of templates together"""
# {{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}
test = [tokens.TemplateOpen(), tokens.TemplateOpen(),
tokens.TemplateOpen(), tokens.TemplateOpen(),
tokens.Text(text="foo"), tokens.TemplateClose(),
tokens.Text(text="bar"), tokens.TemplateParamSeparator(),
tokens.Text(text="baz"), tokens.TemplateParamEquals(),
tokens.Text(text="biz"), tokens.TemplateClose(),
tokens.Text(text="buzz"), tokens.TemplateClose(),
tokens.Text(text="usr"), tokens.TemplateParamSeparator(),
tokens.TemplateOpen(), tokens.Text(text="bin"),
tokens.TemplateClose(), tokens.TemplateClose()]
valid = wrap(
[Template(wrap([Template(wrap([Template(wrap([Template(wrap([Text(
"foo")])), Text("bar")]), params=[Parameter(wrap([Text("baz")]),
wrap([Text("biz")]))]), Text("buzz")])), Text("usr")]), params=[
Parameter(wrap([Text("1")]), wrap([Template(wrap([Text("bin")]))]),
showkey=False)])])
self.assertWikicodeEqual(valid, self.builder.build(test))

def test_integration2(self):
"""an even more audacious test for building a horrible wikicode mess"""
# {{a|b|{{c|[[d]]{{{e}}}}}}}[[f|{{{g}}}<!--h-->]]{{i|j=&nbsp;}}
test = [tokens.TemplateOpen(), tokens.Text(text="a"),
tokens.TemplateParamSeparator(), tokens.Text(text="b"),
tokens.TemplateParamSeparator(), tokens.TemplateOpen(),
tokens.Text(text="c"), tokens.TemplateParamSeparator(),
tokens.WikilinkOpen(), tokens.Text(text="d"),
tokens.WikilinkClose(), tokens.ArgumentOpen(),
tokens.Text(text="e"), tokens.ArgumentClose(),
tokens.TemplateClose(), tokens.TemplateClose(),
tokens.WikilinkOpen(), tokens.Text(text="f"),
tokens.WikilinkSeparator(), tokens.ArgumentOpen(),
tokens.Text(text="g"), tokens.ArgumentClose(),
tokens.CommentStart(), tokens.Text(text="h"),
tokens.CommentEnd(), tokens.WikilinkClose(),
tokens.TemplateOpen(), tokens.Text(text="i"),
tokens.TemplateParamSeparator(), tokens.Text(text="j"),
tokens.TemplateParamEquals(), tokens.HTMLEntityStart(),
tokens.Text(text="nbsp"), tokens.HTMLEntityEnd(),
tokens.TemplateClose()]
valid = wrap(
[Template(wrap([Text("a")]), params=[Parameter(wrap([Text("1")]),
wrap([Text("b")]), showkey=False), Parameter(wrap([Text("2")]),
wrap([Template(wrap([Text("c")]), params=[Parameter(wrap([Text("1")
]), wrap([Wikilink(wrap([Text("d")])), Argument(wrap([Text("e")]))]
), showkey=False)])]), showkey=False)]), Wikilink(wrap([Text("f")]
), wrap([Argument(wrap([Text("g")])), Comment(wrap([Text("h")]))])
), Template(wrap([Text("i")]), params=[Parameter(wrap([Text("j")]),
wrap([HTMLEntity("nbsp", named=True)]))])])
self.assertWikicodeEqual(valid, self.builder.build(test))

if __name__ == "__main__":
unittest.main(verbosity=2)

+ 47
- 0
tests/test_ctokenizer.py View File

@@ -0,0 +1,47 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from __future__ import unicode_literals
import unittest

try:
from mwparserfromhell.parser._tokenizer import CTokenizer
except ImportError:
CTokenizer = None

from ._test_tokenizer import TokenizerTestCase

@unittest.skipUnless(CTokenizer, "C tokenizer not available")
class TestCTokenizer(TokenizerTestCase, unittest.TestCase):
"""Test cases for the C tokenizer."""

@classmethod
def setUpClass(cls):
cls.tokenizer = CTokenizer

def test_uses_c(self):
"""make sure the C tokenizer identifies as using a C extension"""
self.assertTrue(CTokenizer.USES_C)
self.assertTrue(CTokenizer().USES_C)

if __name__ == "__main__":
unittest.main(verbosity=2)

+ 131
- 0
tests/test_docs.py View File

@@ -0,0 +1,131 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from __future__ import print_function, unicode_literals
import json
import unittest

import mwparserfromhell
from mwparserfromhell.compat import py3k, str

from .compat import StringIO, urlencode, urlopen

class TestDocs(unittest.TestCase):
"""Integration test cases for mwparserfromhell's documentation."""

def assertPrint(self, input, output):
"""Assertion check that *input*, when printed, produces *output*."""
buff = StringIO()
print(input, end="", file=buff)
buff.seek(0)
self.assertEqual(output, buff.read())

def test_readme_1(self):
"""test a block of example code in the README"""
text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?"
wikicode = mwparserfromhell.parse(text)
self.assertPrint(wikicode,
"I has a template! {{foo|bar|baz|eggs=spam}} See it?")
templates = wikicode.filter_templates()
if py3k:
self.assertPrint(templates, "['{{foo|bar|baz|eggs=spam}}']")
else:
self.assertPrint(templates, "[u'{{foo|bar|baz|eggs=spam}}']")
template = templates[0]
self.assertPrint(template.name, "foo")
if py3k:
self.assertPrint(template.params, "['bar', 'baz', 'eggs=spam']")
else:
self.assertPrint(template.params, "[u'bar', u'baz', u'eggs=spam']")
self.assertPrint(template.get(1).value, "bar")
self.assertPrint(template.get("eggs").value, "spam")

def test_readme_2(self):
"""test a block of example code in the README"""
code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}")
if py3k:
self.assertPrint(code.filter_templates(),
"['{{foo|this {{includes a|template}}}}']")
else:
self.assertPrint(code.filter_templates(),
"[u'{{foo|this {{includes a|template}}}}']")
foo = code.filter_templates()[0]
self.assertPrint(foo.get(1).value, "this {{includes a|template}}")
self.assertPrint(foo.get(1).value.filter_templates()[0],
"{{includes a|template}}")
self.assertPrint(foo.get(1).value.filter_templates()[0].get(1).value,
"template")

def test_readme_3(self):
"""test a block of example code in the README"""
text = "{{foo|{{bar}}={{baz|{{spam}}}}}}"
temps = mwparserfromhell.parse(text).filter_templates(recursive=True)
if py3k:
res = "['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']"
else:
res = "[u'{{foo|{{bar}}={{baz|{{spam}}}}}}', u'{{bar}}', u'{{baz|{{spam}}}}', u'{{spam}}']"
self.assertPrint(temps, res)

def test_readme_4(self):
"""test a block of example code in the README"""
text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}"
code = mwparserfromhell.parse(text)
for template in code.filter_templates():
if template.name == "cleanup" and not template.has_param("date"):
template.add("date", "July 2012")
res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}}"
self.assertPrint(code, res)
code.replace("{{uncategorized}}", "{{bar-stub}}")
res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}"
self.assertPrint(code, res)
if py3k:
res = "['{{cleanup|date=July 2012}}', '{{bar-stub}}']"
else:
res = "[u'{{cleanup|date=July 2012}}', u'{{bar-stub}}']"
self.assertPrint(code.filter_templates(), res)
text = str(code)
res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}"
self.assertPrint(text, res)
self.assertEqual(text, code)

def test_readme_5(self):
"""test a block of example code in the README; includes a web call"""
url1 = "http://en.wikipedia.org/w/api.php"
url2 = "http://en.wikipedia.org/w/index.php?title={0}&action=raw"
title = "Test"
data = {"action": "query", "prop": "revisions", "rvlimit": 1,
"rvprop": "content", "format": "json", "titles": title}
try:
raw = urlopen(url1, urlencode(data).encode("utf8")).read()
except IOError:
self.skipTest("cannot continue because of unsuccessful web call")
res = json.loads(raw.decode("utf8"))
text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"]
try:
expected = urlopen(url2.format(title)).read().decode("utf8")
except IOError:
self.skipTest("cannot continue because of unsuccessful web call")
actual = mwparserfromhell.parse(text)
self.assertEqual(expected, actual)

if __name__ == "__main__":
unittest.main(verbosity=2)

+ 0
- 119
tests/test_parameter.py View File

@@ -1,119 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import unittest

from mwparserfromhell.parameter import Parameter
from mwparserfromhell.template import Template

class TestParameter(unittest.TestCase):
def setUp(self):
self.name = "foo"
self.value1 = "bar"
self.value2 = "{{spam}}"
self.value3 = "bar{{spam}}"
self.value4 = "embedded {{eggs|spam|baz=buz}} {{goes}} here"
self.templates2 = [Template("spam")]
self.templates3 = [Template("spam")]
self.templates4 = [Template("eggs", [Parameter("1", "spam"),
Parameter("baz", "buz")]),
Template("goes")]

def test_construct(self):
Parameter(self.name, self.value1)
Parameter(self.name, self.value2, self.templates2)
Parameter(name=self.name, value=self.value3)
Parameter(name=self.name, value=self.value4, templates=self.templates4)

def test_name(self):
params = [
Parameter(self.name, self.value1),
Parameter(self.name, self.value2, self.templates2),
Parameter(name=self.name, value=self.value3),
Parameter(name=self.name, value=self.value4,
templates=self.templates4)
]
for param in params:
self.assertEqual(param.name, self.name)

def test_value(self):
tests = [
(Parameter(self.name, self.value1), self.value1),
(Parameter(self.name, self.value2, self.templates2), self.value2),
(Parameter(name=self.name, value=self.value3), self.value3),
(Parameter(name=self.name, value=self.value4,
templates=self.templates4), self.value4)
]
for param, correct in tests:
self.assertEqual(param.value, correct)

def test_templates(self):
tests = [
(Parameter(self.name, self.value3, self.templates3),
self.templates3),
(Parameter(name=self.name, value=self.value4,
templates=self.templates4), self.templates4)
]
for param, correct in tests:
self.assertEqual(param.templates, correct)

def test_magic(self):
params = [Parameter(self.name, self.value1),
Parameter(self.name, self.value2, self.templates2),
Parameter(self.name, self.value3, self.templates3),
Parameter(self.name, self.value4, self.templates4)]
for param in params:
self.assertEqual(repr(param), repr(param.value))
self.assertEqual(str(param), str(param.value))
self.assertIs(param < "eggs", param.value < "eggs")
self.assertIs(param <= "bar{{spam}}", param.value <= "bar{{spam}}")
self.assertIs(param == "bar", param.value == "bar")
self.assertIs(param != "bar", param.value != "bar")
self.assertIs(param > "eggs", param.value > "eggs")
self.assertIs(param >= "bar{{spam}}", param.value >= "bar{{spam}}")
self.assertEquals(bool(param), bool(param.value))
self.assertEquals(len(param), len(param.value))
self.assertEquals(list(param), list(param.value))
self.assertEquals(param[2], param.value[2])
self.assertEquals(list(reversed(param)),
list(reversed(param.value)))
self.assertIs("bar" in param, "bar" in param.value)
self.assertEquals(param + "test", param.value + "test")
self.assertEquals("test" + param, "test" + param.value)
# add param
# add template left
# add template right

self.assertEquals(param * 3, Parameter(param.name, param.value * 3,
param.templates * 3))
self.assertEquals(3 * param, Parameter(param.name, 3 * param.value,
3 * param.templates))

# add param inplace
# add template implace
# add str inplace
# multiply int inplace
self.assertIsInstance(param, Parameter)
self.assertIsInstance(param.value, str)

if __name__ == "__main__":
unittest.main(verbosity=2)

+ 41
- 35
tests/test_parser.py View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -20,44 +20,50 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from __future__ import unicode_literals
import unittest

from mwparserfromhell.parameter import Parameter
from mwparserfromhell.parser import Parser
from mwparserfromhell.template import Template
from mwparserfromhell import parser
from mwparserfromhell.nodes import Template, Text, Wikilink
from mwparserfromhell.nodes.extras import Parameter
from mwparserfromhell.smart_list import SmartList
from mwparserfromhell.wikicode import Wikicode

TESTS = [
("", []),
("abcdef ghijhk", []),
("abc{this is not a template}def", []),
("neither is {{this one}nor} {this one {despite}} containing braces", []),
("this is an acceptable {{template}}", [Template("template")]),
("{{multiple}}{{templates}}", [Template("multiple"),
Template("templates")]),
("multiple {{-}} templates {{+}}!", [Template("-"), Template("+")]),
("{{{no templates here}}}", []),
("{ {{templates here}}}", [Template("templates here")]),
("{{{{I do not exist}}}}", []),
("{{foo|bar|baz|eggs=spam}}",
[Template("foo", [Parameter("1", "bar"), Parameter("2", "baz"),
Parameter("eggs", "spam")])]),
("{{abc def|ghi|jk=lmno|pqr|st=uv|wx|yz}}",
[Template("abc def", [Parameter("1", "ghi"), Parameter("jk", "lmno"),
Parameter("2", "pqr"), Parameter("st", "uv"),
Parameter("3", "wx"), Parameter("4", "yz")])]),
("{{this has a|{{template}}|inside of it}}",
[Template("this has a", [Parameter("1", "{{template}}",
[Template("template")]),
Parameter("2", "inside of it")])]),
("{{{{I exist}} }}", [Template("I exist", [] )]),
("{{}}")
]
from ._test_tree_equality import TreeEqualityTestCase
from .compat import range

class TestParser(unittest.TestCase):
def test_parse(self):
parser = Parser()
for unparsed, parsed in TESTS:
self.assertEqual(parser.parse(unparsed), parsed)
class TestParser(TreeEqualityTestCase):
"""Tests for the Parser class itself, which tokenizes and builds nodes."""

def test_use_c(self):
"""make sure the correct tokenizer is used"""
if parser.use_c:
self.assertTrue(parser.Parser(None)._tokenizer.USES_C)
parser.use_c = False
self.assertFalse(parser.Parser(None)._tokenizer.USES_C)

def test_parsing(self):
"""integration test for parsing overall"""
text = "this is text; {{this|is=a|template={{with|[[links]]|in}}it}}"
wrap = lambda L: Wikicode(SmartList(L))
expected = wrap([
Text("this is text; "),
Template(wrap([Text("this")]), [
Parameter(wrap([Text("is")]), wrap([Text("a")])),
Parameter(wrap([Text("template")]), wrap([
Template(wrap([Text("with")]), [
Parameter(wrap([Text("1")]),
wrap([Wikilink(wrap([Text("links")]))]),
showkey=False),
Parameter(wrap([Text("2")]),
wrap([Text("in")]), showkey=False)
]),
Text("it")
]))
])
])
actual = parser.Parser(text).parse()
self.assertWikicodeEqual(expected, actual)

if __name__ == "__main__":
unittest.main(verbosity=2)

+ 43
- 0
tests/test_pytokenizer.py View File

@@ -0,0 +1,43 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from __future__ import unicode_literals
import unittest

from mwparserfromhell.parser.tokenizer import Tokenizer

from ._test_tokenizer import TokenizerTestCase

class TestPyTokenizer(TokenizerTestCase, unittest.TestCase):
"""Test cases for the Python tokenizer."""

@classmethod
def setUpClass(cls):
cls.tokenizer = Tokenizer

def test_uses_c(self):
"""make sure the Python tokenizer identifies as not using C"""
self.assertFalse(Tokenizer.USES_C)
self.assertFalse(Tokenizer().USES_C)

if __name__ == "__main__":
unittest.main(verbosity=2)

+ 392
- 0
tests/test_smart_list.py View File

@@ -0,0 +1,392 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from __future__ import unicode_literals
import unittest

from mwparserfromhell.compat import py3k
from mwparserfromhell.smart_list import SmartList, _ListProxy

from .compat import range

class TestSmartList(unittest.TestCase):
"""Test cases for the SmartList class and its child, _ListProxy."""

def _test_get_set_del_item(self, builder):
"""Run tests on __get/set/delitem__ of a list built with *builder*."""
def assign(L, s1, s2, s3, val):
L[s1:s2:s3] = val
def delete(L, s1):
del L[s1]

list1 = builder([0, 1, 2, 3, "one", "two"])
list2 = builder(list(range(10)))

self.assertEqual(1, list1[1])
self.assertEqual("one", list1[-2])
self.assertEqual([2, 3], list1[2:4])
self.assertRaises(IndexError, lambda: list1[6])
self.assertRaises(IndexError, lambda: list1[-7])

self.assertEqual([0, 1, 2], list1[:3])
self.assertEqual([0, 1, 2, 3, "one", "two"], list1[:])
self.assertEqual([3, "one", "two"], list1[3:])
self.assertEqual(["one", "two"], list1[-2:])
self.assertEqual([0, 1], list1[:-4])
self.assertEqual([], list1[6:])
self.assertEqual([], list1[4:2])

self.assertEqual([0, 2, "one"], list1[0:5:2])
self.assertEqual([0, 2], list1[0:-3:2])
self.assertEqual([0, 1, 2, 3, "one", "two"], list1[::])
self.assertEqual([2, 3, "one", "two"], list1[2::])
self.assertEqual([0, 1, 2, 3], list1[:4:])
self.assertEqual([2, 3], list1[2:4:])
self.assertEqual([0, 2, 4, 6, 8], list2[::2])
self.assertEqual([2, 5, 8], list2[2::3])
self.assertEqual([0, 3], list2[:6:3])
self.assertEqual([2, 5, 8], list2[-8:9:3])
self.assertEqual([], list2[100000:1000:-100])

list1[3] = 100
self.assertEqual(100, list1[3])
list1[-3] = 101
self.assertEqual([0, 1, 2, 101, "one", "two"], list1)
list1[5:] = [6, 7, 8]
self.assertEqual([6, 7, 8], list1[5:])
self.assertEqual([0, 1, 2, 101, "one", 6, 7, 8], list1)
list1[2:4] = [-1, -2, -3, -4, -5]
self.assertEqual([0, 1, -1, -2, -3, -4, -5, "one", 6, 7, 8], list1)
list1[0:-3] = [99]
self.assertEqual([99, 6, 7, 8], list1)
list2[0:6:2] = [100, 102, 104]
self.assertEqual([100, 1, 102, 3, 104, 5, 6, 7, 8, 9], list2)
list2[::3] = [200, 203, 206, 209]
self.assertEqual([200, 1, 102, 203, 104, 5, 206, 7, 8, 209], list2)
list2[::] = range(7)
self.assertEqual([0, 1, 2, 3, 4, 5, 6], list2)
self.assertRaises(ValueError, assign, list2, 0, 5, 2,
[100, 102, 104, 106])

del list2[2]
self.assertEqual([0, 1, 3, 4, 5, 6], list2)
del list2[-3]
self.assertEqual([0, 1, 3, 5, 6], list2)
self.assertRaises(IndexError, delete, list2, 100)
self.assertRaises(IndexError, delete, list2, -6)
list2[:] = range(10)
del list2[3:6]
self.assertEqual([0, 1, 2, 6, 7, 8, 9], list2)
del list2[-2:]
self.assertEqual([0, 1, 2, 6, 7], list2)
del list2[:2]
self.assertEqual([2, 6, 7], list2)
list2[:] = range(10)
del list2[2:8:2]
self.assertEqual([0, 1, 3, 5, 7, 8, 9], list2)

def _test_add_radd_iadd(self, builder):
"""Run tests on __r/i/add__ of a list built with *builder*."""
list1 = builder(range(5))
list2 = builder(range(5, 10))
self.assertEqual([0, 1, 2, 3, 4, 5, 6], list1 + [5, 6])
self.assertEqual([0, 1, 2, 3, 4], list1)
self.assertEqual(list(range(10)), list1 + list2)
self.assertEqual([-2, -1, 0, 1, 2, 3, 4], [-2, -1] + list1)
self.assertEqual([0, 1, 2, 3, 4], list1)
list1 += ["foo", "bar", "baz"]
self.assertEqual([0, 1, 2, 3, 4, "foo", "bar", "baz"], list1)

def _test_other_magic_methods(self, builder):
"""Run tests on other magic methods of a list built with *builder*."""
list1 = builder([0, 1, 2, 3, "one", "two"])
list2 = builder([])
list3 = builder([0, 2, 3, 4])
list4 = builder([0, 1, 2])

if py3k:
self.assertEqual("[0, 1, 2, 3, 'one', 'two']", str(list1))
self.assertEqual(b"\x00\x01\x02", bytes(list4))
self.assertEqual("[0, 1, 2, 3, 'one', 'two']", repr(list1))
else:
self.assertEqual("[0, 1, 2, 3, u'one', u'two']", unicode(list1))
self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", str(list1))
self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", repr(list1))

self.assertTrue(list1 < list3)
self.assertTrue(list1 <= list3)
self.assertFalse(list1 == list3)
self.assertTrue(list1 != list3)
self.assertFalse(list1 > list3)
self.assertFalse(list1 >= list3)

other1 = [0, 2, 3, 4]
self.assertTrue(list1 < other1)
self.assertTrue(list1 <= other1)
self.assertFalse(list1 == other1)
self.assertTrue(list1 != other1)
self.assertFalse(list1 > other1)
self.assertFalse(list1 >= other1)

other2 = [0, 0, 1, 2]
self.assertFalse(list1 < other2)
self.assertFalse(list1 <= other2)
self.assertFalse(list1 == other2)
self.assertTrue(list1 != other2)
self.assertTrue(list1 > other2)
self.assertTrue(list1 >= other2)

other3 = [0, 1, 2, 3, "one", "two"]
self.assertFalse(list1 < other3)
self.assertTrue(list1 <= other3)
self.assertTrue(list1 == other3)
self.assertFalse(list1 != other3)
self.assertFalse(list1 > other3)
self.assertTrue(list1 >= other3)

self.assertTrue(bool(list1))
self.assertFalse(bool(list2))

self.assertEqual(6, len(list1))
self.assertEqual(0, len(list2))

out = []
for obj in list1:
out.append(obj)
self.assertEqual([0, 1, 2, 3, "one", "two"], out)

out = []
for ch in list2:
out.append(ch)
self.assertEqual([], out)

gen1 = iter(list1)
out = []
for i in range(len(list1)):
out.append(next(gen1))
self.assertRaises(StopIteration, next, gen1)
self.assertEqual([0, 1, 2, 3, "one", "two"], out)
gen2 = iter(list2)
self.assertRaises(StopIteration, next, gen2)

self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1)))
self.assertEqual([], list(reversed(list2)))

self.assertTrue("one" in list1)
self.assertTrue(3 in list1)
self.assertFalse(10 in list1)
self.assertFalse(0 in list2)

self.assertEqual([], list2 * 5)
self.assertEqual([], 5 * list2)
self.assertEqual([0, 1, 2, 0, 1, 2, 0, 1, 2], list4 * 3)
self.assertEqual([0, 1, 2, 0, 1, 2, 0, 1, 2], 3 * list4)
list4 *= 2
self.assertEqual([0, 1, 2, 0, 1, 2], list4)

def _test_list_methods(self, builder):
"""Run tests on the public methods of a list built with *builder*."""
list1 = builder(range(5))
list2 = builder(["foo"])
list3 = builder([("a", 5), ("d", 2), ("b", 8), ("c", 3)])

list1.append(5)
list1.append(1)
list1.append(2)
self.assertEqual([0, 1, 2, 3, 4, 5, 1, 2], list1)

self.assertEqual(0, list1.count(6))
self.assertEqual(2, list1.count(1))

list1.extend(range(5, 8))
self.assertEqual([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1)

self.assertEqual(1, list1.index(1))
self.assertEqual(6, list1.index(1, 3))
self.assertEqual(6, list1.index(1, 3, 7))
self.assertRaises(ValueError, list1.index, 1, 3, 5)

list1.insert(0, -1)
self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1)
list1.insert(-1, 6.5)
self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1)
list1.insert(13, 8)
self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1)

self.assertEqual(8, list1.pop())
self.assertEqual(7, list1.pop())
self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1)
self.assertEqual(-1, list1.pop(0))
self.assertEqual(5, list1.pop(5))
self.assertEqual(6.5, list1.pop(-1))
self.assertEqual([0, 1, 2, 3, 4, 1, 2, 5, 6], list1)
self.assertEqual("foo", list2.pop())
self.assertRaises(IndexError, list2.pop)
self.assertEqual([], list2)

list1.remove(6)
self.assertEqual([0, 1, 2, 3, 4, 1, 2, 5], list1)
list1.remove(1)
self.assertEqual([0, 2, 3, 4, 1, 2, 5], list1)
list1.remove(1)
self.assertEqual([0, 2, 3, 4, 2, 5], list1)
self.assertRaises(ValueError, list1.remove, 1)

list1.reverse()
self.assertEqual([5, 2, 4, 3, 2, 0], list1)

list1.sort()
self.assertEqual([0, 2, 2, 3, 4, 5], list1)
list1.sort(reverse=True)
self.assertEqual([5, 4, 3, 2, 2, 0], list1)
if not py3k:
func = lambda x, y: abs(3 - x) - abs(3 - y) # Distance from 3
list1.sort(cmp=func)
self.assertEqual([3, 4, 2, 2, 5, 0], list1)
list1.sort(cmp=func, reverse=True)
self.assertEqual([0, 5, 4, 2, 2, 3], list1)
list3.sort(key=lambda i: i[1])
self.assertEqual([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3)
list3.sort(key=lambda i: i[1], reverse=True)
self.assertEqual([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3)

def test_docs(self):
"""make sure the methods of SmartList/_ListProxy have docstrings"""
methods = ["append", "count", "extend", "index", "insert", "pop",
"remove", "reverse", "sort"]
for meth in methods:
expected = getattr(list, meth).__doc__
smartlist_doc = getattr(SmartList, meth).__doc__
listproxy_doc = getattr(_ListProxy, meth).__doc__
self.assertEqual(expected, smartlist_doc)
self.assertEqual(expected, listproxy_doc)

def test_doctest(self):
"""make sure the test embedded in SmartList's docstring passes"""
parent = SmartList([0, 1, 2, 3])
self.assertEqual([0, 1, 2, 3], parent)
child = parent[2:]
self.assertEqual([2, 3], child)
child.append(4)
self.assertEqual([2, 3, 4], child)
self.assertEqual([0, 1, 2, 3, 4], parent)

def test_parent_get_set_del(self):
"""make sure SmartList's getitem/setitem/delitem work"""
self._test_get_set_del_item(SmartList)

def test_parent_add(self):
"""make sure SmartList's add/radd/iadd work"""
self._test_add_radd_iadd(SmartList)

def test_parent_unaffected_magics(self):
"""sanity checks against SmartList features that were not modified"""
self._test_other_magic_methods(SmartList)

def test_parent_methods(self):
"""make sure SmartList's non-magic methods work, like append()"""
self._test_list_methods(SmartList)

def test_child_get_set_del(self):
"""make sure _ListProxy's getitem/setitem/delitem work"""
self._test_get_set_del_item(lambda L: SmartList(list(L))[:])
self._test_get_set_del_item(lambda L: SmartList([999] + list(L))[1:])
self._test_get_set_del_item(lambda L: SmartList(list(L) + [999])[:-1])
builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]
self._test_get_set_del_item(builder)

def test_child_add(self):
"""make sure _ListProxy's add/radd/iadd work"""
self._test_add_radd_iadd(lambda L: SmartList(list(L))[:])
self._test_add_radd_iadd(lambda L: SmartList([999] + list(L))[1:])
self._test_add_radd_iadd(lambda L: SmartList(list(L) + [999])[:-1])
builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]
self._test_add_radd_iadd(builder)

def test_child_other_magics(self):
"""make sure _ListProxy's other magically implemented features work"""
self._test_other_magic_methods(lambda L: SmartList(list(L))[:])
self._test_other_magic_methods(lambda L: SmartList([999] + list(L))[1:])
self._test_other_magic_methods(lambda L: SmartList(list(L) + [999])[:-1])
builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]
self._test_other_magic_methods(builder)

def test_child_methods(self):
"""make sure _ListProxy's non-magic methods work, like append()"""
self._test_list_methods(lambda L: SmartList(list(L))[:])
self._test_list_methods(lambda L: SmartList([999] + list(L))[1:])
self._test_list_methods(lambda L: SmartList(list(L) + [999])[:-1])
builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2]
self._test_list_methods(builder)

def test_influence(self):
"""make sure changes are propagated from parents to children"""
parent = SmartList([0, 1, 2, 3, 4, 5])
child1 = parent[2:]
child2 = parent[2:5]

parent.append(6)
child1.append(7)
child2.append(4.5)
self.assertEqual([0, 1, 2, 3, 4, 4.5, 5, 6, 7], parent)
self.assertEqual([2, 3, 4, 4.5, 5, 6, 7], child1)
self.assertEqual([2, 3, 4, 4.5], child2)

parent.insert(0, -1)
parent.insert(4, 2.5)
parent.insert(10, 6.5)
self.assertEqual([-1, 0, 1, 2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], parent)
self.assertEqual([2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], child1)
self.assertEqual([2, 2.5, 3, 4, 4.5], child2)

self.assertEqual(7, parent.pop())
self.assertEqual(6.5, child1.pop())
self.assertEqual(4.5, child2.pop())
self.assertEqual([-1, 0, 1, 2, 2.5, 3, 4, 5, 6], parent)
self.assertEqual([2, 2.5, 3, 4, 5, 6], child1)
self.assertEqual([2, 2.5, 3, 4], child2)

parent.remove(-1)
child1.remove(2.5)
self.assertEqual([0, 1, 2, 3, 4, 5, 6], parent)
self.assertEqual([2, 3, 4, 5, 6], child1)
self.assertEqual([2, 3, 4], child2)

self.assertEqual(0, parent.pop(0))
self.assertEqual([1, 2, 3, 4, 5, 6], parent)
self.assertEqual([2, 3, 4, 5, 6], child1)
self.assertEqual([2, 3, 4], child2)

child2.reverse()
self.assertEqual([1, 4, 3, 2, 5, 6], parent)
self.assertEqual([4, 3, 2, 5, 6], child1)
self.assertEqual([4, 3, 2], child2)

parent.extend([7, 8])
child1.extend([8.1, 8.2])
child2.extend([1.9, 1.8])
self.assertEqual([1, 4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], parent)
self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1)
self.assertEqual([4, 3, 2, 1.9, 1.8], child2)

if __name__ == "__main__":
unittest.main(verbosity=2)

+ 435
- 0
tests/test_string_mixin.py View File

@@ -0,0 +1,435 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from __future__ import unicode_literals
from sys import getdefaultencoding
from types import GeneratorType
import unittest

from mwparserfromhell.compat import bytes, py3k, str
from mwparserfromhell.string_mixin import StringMixIn

from .compat import range

class _FakeString(StringMixIn):
def __init__(self, data):
self._data = data

def __unicode__(self):
return self._data


class TestStringMixIn(unittest.TestCase):
"""Test cases for the StringMixIn class."""

def test_docs(self):
"""make sure the various methods of StringMixIn have docstrings"""
methods = [
"capitalize", "center", "count", "encode", "endswith",
"expandtabs", "find", "format", "index", "isalnum", "isalpha",
"isdecimal", "isdigit", "islower", "isnumeric", "isspace",
"istitle", "isupper", "join", "ljust", "lower", "lstrip",
"partition", "replace", "rfind", "rindex", "rjust", "rpartition",
"rsplit", "rstrip", "split", "splitlines", "startswith", "strip",
"swapcase", "title", "translate", "upper", "zfill"]
if py3k:
methods.extend(["casefold", "format_map", "isidentifier",
"isprintable", "maketrans"])
else:
methods.append("decode")
for meth in methods:
expected = getattr(str, meth).__doc__
actual = getattr(StringMixIn, meth).__doc__
self.assertEqual(expected, actual)

def test_types(self):
"""make sure StringMixIns convert to different types correctly"""
fstr = _FakeString("fake string")
self.assertEqual(str(fstr), "fake string")
self.assertEqual(bytes(fstr), b"fake string")
if py3k:
self.assertEqual(repr(fstr), "'fake string'")
else:
self.assertEqual(repr(fstr), b"u'fake string'")

self.assertIsInstance(str(fstr), str)
self.assertIsInstance(bytes(fstr), bytes)
if py3k:
self.assertIsInstance(repr(fstr), str)
else:
self.assertIsInstance(repr(fstr), bytes)

def test_comparisons(self):
"""make sure comparison operators work"""
str1 = _FakeString("this is a fake string")
str2 = _FakeString("this is a fake string")
str3 = _FakeString("fake string, this is")
str4 = "this is a fake string"
str5 = "fake string, this is"

self.assertFalse(str1 > str2)
self.assertTrue(str1 >= str2)
self.assertTrue(str1 == str2)
self.assertFalse(str1 != str2)
self.assertFalse(str1 < str2)
self.assertTrue(str1 <= str2)

self.assertTrue(str1 > str3)
self.assertTrue(str1 >= str3)
self.assertFalse(str1 == str3)
self.assertTrue(str1 != str3)
self.assertFalse(str1 < str3)
self.assertFalse(str1 <= str3)

self.assertFalse(str1 > str4)
self.assertTrue(str1 >= str4)
self.assertTrue(str1 == str4)
self.assertFalse(str1 != str4)
self.assertFalse(str1 < str4)
self.assertTrue(str1 <= str4)

self.assertTrue(str1 > str5)
self.assertTrue(str1 >= str5)
self.assertFalse(str1 == str5)
self.assertTrue(str1 != str5)
self.assertFalse(str1 < str5)
self.assertFalse(str1 <= str5)

def test_other_magics(self):
"""test other magically implemented features, like len() and iter()"""
str1 = _FakeString("fake string")
str2 = _FakeString("")
expected = ["f", "a", "k", "e", " ", "s", "t", "r", "i", "n", "g"]

self.assertTrue(str1)
self.assertFalse(str2)
self.assertEqual(11, len(str1))
self.assertEqual(0, len(str2))

out = []
for ch in str1:
out.append(ch)
self.assertEqual(expected, out)

out = []
for ch in str2:
out.append(ch)
self.assertEqual([], out)

gen1 = iter(str1)
gen2 = iter(str2)
self.assertIsInstance(gen1, GeneratorType)
self.assertIsInstance(gen2, GeneratorType)

out = []
for i in range(len(str1)):
out.append(next(gen1))
self.assertRaises(StopIteration, next, gen1)
self.assertEqual(expected, out)
self.assertRaises(StopIteration, next, gen2)

self.assertEqual("gnirts ekaf", "".join(list(reversed(str1))))
self.assertEqual([], list(reversed(str2)))

self.assertEqual("f", str1[0])
self.assertEqual(" ", str1[4])
self.assertEqual("g", str1[10])
self.assertEqual("n", str1[-2])
self.assertRaises(IndexError, lambda: str1[11])
self.assertRaises(IndexError, lambda: str2[0])

self.assertTrue("k" in str1)
self.assertTrue("fake" in str1)
self.assertTrue("str" in str1)
self.assertTrue("" in str1)
self.assertTrue("" in str2)
self.assertFalse("real" in str1)
self.assertFalse("s" in str2)

def test_other_methods(self):
"""test the remaining non-magic methods of StringMixIn"""
str1 = _FakeString("fake string")
self.assertEqual("Fake string", str1.capitalize())

self.assertEqual(" fake string ", str1.center(15))
self.assertEqual(" fake string ", str1.center(16))
self.assertEqual("qqfake stringqq", str1.center(15, "q"))

self.assertEqual(1, str1.count("e"))
self.assertEqual(0, str1.count("z"))
self.assertEqual(1, str1.count("r", 7))
self.assertEqual(0, str1.count("r", 8))
self.assertEqual(1, str1.count("r", 5, 9))
self.assertEqual(0, str1.count("r", 5, 7))

if not py3k:
str2 = _FakeString("fo")
self.assertEqual(str1, str1.decode())
actual = _FakeString("\\U00010332\\U0001033f\\U00010344")
self.assertEqual("𐌲𐌿𐍄", actual.decode("unicode_escape"))
self.assertRaises(UnicodeError, str2.decode, "punycode")
self.assertEqual("", str2.decode("punycode", "ignore"))

str3 = _FakeString("𐌲𐌿𐍄")
actual = b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84"
self.assertEqual(b"fake string", str1.encode())
self.assertEqual(actual, str3.encode("utf-8"))
self.assertEqual(actual, str3.encode(encoding="utf-8"))
if getdefaultencoding() == "ascii":
self.assertRaises(UnicodeEncodeError, str3.encode)
elif getdefaultencoding() == "utf-8":
self.assertEqual(actual, str3.encode())
self.assertRaises(UnicodeEncodeError, str3.encode, "ascii")
self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict")
if getdefaultencoding() == "ascii":
self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict")
elif getdefaultencoding() == "utf-8":
self.assertEqual(actual, str3.encode(errors="strict"))
self.assertEqual(b"", str3.encode("ascii", "ignore"))
if getdefaultencoding() == "ascii":
self.assertEqual(b"", str3.encode(errors="ignore"))
elif getdefaultencoding() == "utf-8":
self.assertEqual(actual, str3.encode(errors="ignore"))

self.assertTrue(str1.endswith("ing"))
self.assertFalse(str1.endswith("ingh"))

str4 = _FakeString("\tfoobar")
self.assertEqual("fake string", str1)
self.assertEqual(" foobar", str4.expandtabs())
self.assertEqual(" foobar", str4.expandtabs(4))

self.assertEqual(3, str1.find("e"))
self.assertEqual(-1, str1.find("z"))
self.assertEqual(7, str1.find("r", 7))
self.assertEqual(-1, str1.find("r", 8))
self.assertEqual(7, str1.find("r", 5, 9))
self.assertEqual(-1, str1.find("r", 5, 7))

str5 = _FakeString("foo{0}baz")
str6 = _FakeString("foo{abc}baz")
str7 = _FakeString("foo{0}{abc}buzz")
str8 = _FakeString("{0}{1}")
self.assertEqual("fake string", str1.format())
self.assertEqual("foobarbaz", str5.format("bar"))
self.assertEqual("foobarbaz", str6.format(abc="bar"))
self.assertEqual("foobarbazbuzz", str7.format("bar", abc="baz"))
self.assertRaises(IndexError, str8.format, "abc")

if py3k:
self.assertEqual("fake string", str1.format_map({}))
self.assertEqual("foobarbaz", str6.format_map({"abc": "bar"}))
self.assertRaises(ValueError, str5.format_map, {0: "abc"})

self.assertEqual(3, str1.index("e"))
self.assertRaises(ValueError, str1.index, "z")
self.assertEqual(7, str1.index("r", 7))
self.assertRaises(ValueError, str1.index, "r", 8)
self.assertEqual(7, str1.index("r", 5, 9))
self.assertRaises(ValueError, str1.index, "r", 5, 7)

str9 = _FakeString("foobar")
str10 = _FakeString("foobar123")
str11 = _FakeString("foo bar")
self.assertTrue(str9.isalnum())
self.assertTrue(str10.isalnum())
self.assertFalse(str11.isalnum())

self.assertTrue(str9.isalpha())
self.assertFalse(str10.isalpha())
self.assertFalse(str11.isalpha())

str12 = _FakeString("123")
str13 = _FakeString("\u2155")
str14 = _FakeString("\u00B2")
self.assertFalse(str9.isdecimal())
self.assertTrue(str12.isdecimal())
self.assertFalse(str13.isdecimal())
self.assertFalse(str14.isdecimal())

self.assertFalse(str9.isdigit())
self.assertTrue(str12.isdigit())
self.assertFalse(str13.isdigit())
self.assertTrue(str14.isdigit())

if py3k:
self.assertTrue(str9.isidentifier())
self.assertTrue(str10.isidentifier())
self.assertFalse(str11.isidentifier())
self.assertFalse(str12.isidentifier())

str15 = _FakeString("")
str16 = _FakeString("FooBar")
self.assertTrue(str9.islower())
self.assertFalse(str15.islower())
self.assertFalse(str16.islower())

self.assertFalse(str9.isnumeric())
self.assertTrue(str12.isnumeric())
self.assertTrue(str13.isnumeric())
self.assertTrue(str14.isnumeric())

if py3k:
str16B = _FakeString("\x01\x02")
self.assertTrue(str9.isprintable())
self.assertTrue(str13.isprintable())
self.assertTrue(str14.isprintable())
self.assertTrue(str15.isprintable())
self.assertFalse(str16B.isprintable())

str17 = _FakeString(" ")
str18 = _FakeString("\t \t \r\n")
self.assertFalse(str1.isspace())
self.assertFalse(str9.isspace())
self.assertTrue(str17.isspace())
self.assertTrue(str18.isspace())

str19 = _FakeString("This Sentence Looks Like A Title")
str20 = _FakeString("This sentence doesn't LookLikeATitle")
self.assertFalse(str15.istitle())
self.assertTrue(str19.istitle())
self.assertFalse(str20.istitle())

str21 = _FakeString("FOOBAR")
self.assertFalse(str9.isupper())
self.assertFalse(str15.isupper())
self.assertTrue(str21.isupper())

self.assertEqual("foobar", str15.join(["foo", "bar"]))
self.assertEqual("foo123bar123baz", str12.join(("foo", "bar", "baz")))

self.assertEqual("fake string ", str1.ljust(15))
self.assertEqual("fake string ", str1.ljust(16))
self.assertEqual("fake stringqqqq", str1.ljust(15, "q"))

str22 = _FakeString("ß")
self.assertEqual("", str15.lower())
self.assertEqual("foobar", str16.lower())
self.assertEqual("ß", str22.lower())
if py3k:
self.assertEqual("", str15.casefold())
self.assertEqual("foobar", str16.casefold())
self.assertEqual("ss", str22.casefold())

str23 = _FakeString(" fake string ")
self.assertEqual("fake string", str1.lstrip())
self.assertEqual("fake string ", str23.lstrip())
self.assertEqual("ke string", str1.lstrip("abcdef"))

self.assertEqual(("fa", "ke", " string"), str1.partition("ke"))
self.assertEqual(("fake string", "", ""), str1.partition("asdf"))

str24 = _FakeString("boo foo moo")
self.assertEqual("real string", str1.replace("fake", "real"))
self.assertEqual("bu fu moo", str24.replace("oo", "u", 2))

self.assertEqual(3, str1.rfind("e"))
self.assertEqual(-1, str1.rfind("z"))
self.assertEqual(7, str1.rfind("r", 7))
self.assertEqual(-1, str1.rfind("r", 8))
self.assertEqual(7, str1.rfind("r", 5, 9))
self.assertEqual(-1, str1.rfind("r", 5, 7))

self.assertEqual(3, str1.rindex("e"))
self.assertRaises(ValueError, str1.rindex, "z")
self.assertEqual(7, str1.rindex("r", 7))
self.assertRaises(ValueError, str1.rindex, "r", 8)
self.assertEqual(7, str1.rindex("r", 5, 9))
self.assertRaises(ValueError, str1.rindex, "r", 5, 7)

self.assertEqual(" fake string", str1.rjust(15))
self.assertEqual(" fake string", str1.rjust(16))
self.assertEqual("qqqqfake string", str1.rjust(15, "q"))

self.assertEqual(("fa", "ke", " string"), str1.rpartition("ke"))
self.assertEqual(("", "", "fake string"), str1.rpartition("asdf"))

str25 = _FakeString(" this is a sentence with whitespace ")
actual = ["this", "is", "a", "sentence", "with", "whitespace"]
self.assertEqual(actual, str25.rsplit())
self.assertEqual(actual, str25.rsplit(None))
actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with",
"", "whitespace", ""]
self.assertEqual(actual, str25.rsplit(" "))
actual = [" this is a", "sentence", "with", "whitespace"]
self.assertEqual(actual, str25.rsplit(None, 3))
actual = [" this is a sentence with", "", "whitespace", ""]
self.assertEqual(actual, str25.rsplit(" ", 3))
if py3k:
actual = [" this is a", "sentence", "with", "whitespace"]
self.assertEqual(actual, str25.rsplit(maxsplit=3))

self.assertEqual("fake string", str1.rstrip())
self.assertEqual(" fake string", str23.rstrip())
self.assertEqual("fake stri", str1.rstrip("ngr"))

actual = ["this", "is", "a", "sentence", "with", "whitespace"]
self.assertEqual(actual, str25.split())
self.assertEqual(actual, str25.split(None))
actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with",
"", "whitespace", ""]
self.assertEqual(actual, str25.split(" "))
actual = ["this", "is", "a", "sentence with whitespace "]
self.assertEqual(actual, str25.split(None, 3))
actual = ["", "", "", "this is a sentence with whitespace "]
self.assertEqual(actual, str25.split(" ", 3))
if py3k:
actual = ["this", "is", "a", "sentence with whitespace "]
self.assertEqual(actual, str25.split(maxsplit=3))

str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere")
self.assertEqual(["lines", "of", "text", "are", "presented", "here"],
str26.splitlines())
self.assertEqual(["lines\n", "of\n", "text\r\n", "are\r\n",
"presented\n", "here"], str26.splitlines(True))

self.assertTrue(str1.startswith("fake"))
self.assertFalse(str1.startswith("faker"))

self.assertEqual("fake string", str1.strip())
self.assertEqual("fake string", str23.strip())
self.assertEqual("ke stri", str1.strip("abcdefngr"))

self.assertEqual("fOObAR", str16.swapcase())

self.assertEqual("Fake String", str1.title())

if py3k:
table1 = str.maketrans({97: "1", 101: "2", 105: "3", 111: "4",
117: "5"})
table2 = str.maketrans("aeiou", "12345")
table3 = str.maketrans("aeiou", "12345", "rts")
self.assertEqual("f1k2 str3ng", str1.translate(table1))
self.assertEqual("f1k2 str3ng", str1.translate(table2))
self.assertEqual("f1k2 3ng", str1.translate(table3))
else:
table = {97: "1", 101: "2", 105: "3", 111: "4", 117: "5"}
self.assertEqual("f1k2 str3ng", str1.translate(table))

self.assertEqual("", str15.upper())
self.assertEqual("FOOBAR", str16.upper())

self.assertEqual("123", str12.zfill(3))
self.assertEqual("000123", str12.zfill(6))

if __name__ == "__main__":
unittest.main(verbosity=2)

+ 0
- 106
tests/test_template.py View File

@@ -1,106 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from itertools import permutations
import unittest

from mwparserfromhell.parameter import Parameter
from mwparserfromhell.template import Template

class TestTemplate(unittest.TestCase):
def setUp(self):
self.name = "foo"
self.bar = Parameter("1", "bar")
self.baz = Parameter("2", "baz")
self.eggs = Parameter("eggs", "spam")
self.params = [self.bar, self.baz, self.eggs]

def test_construct(self):
Template(self.name)
Template(self.name, self.params)
Template(name=self.name)
Template(name=self.name, params=self.params)

def test_name(self):
templates = [
Template(self.name),
Template(self.name, self.params),
Template(name=self.name),
Template(name=self.name, params=self.params)
]
for template in templates:
self.assertEqual(template.name, self.name)

def test_params(self):
for template in (Template(self.name), Template(name=self.name)):
self.assertEqual(template.params, [])
for template in (Template(self.name, self.params),
Template(name=self.name, params=self.params)):
self.assertEqual(template.params, self.params)

def test_getitem(self):
template = Template(name=self.name, params=self.params)
self.assertIs(template[0], self.bar)
self.assertIs(template[1], self.baz)
self.assertIs(template[2], self.eggs)
self.assertIs(template["1"], self.bar)
self.assertIs(template["2"], self.baz)
self.assertIs(template["eggs"], self.eggs)

def test_render(self):
tests = [
(Template(self.name), "{{foo}}"),
(Template(self.name, self.params), "{{foo|bar|baz|eggs=spam}}")
]
for template, rendered in tests:
self.assertEqual(template.render(), rendered)

def test_repr(self):
correct1= 'Template(name=foo, params={})'
correct2 = 'Template(name=foo, params={"1": "bar", "2": "baz", "eggs": "spam"})'
tests = [(Template(self.name), correct1),
(Template(self.name, self.params), correct2)]
for template, correct in tests:
self.assertEqual(repr(template), correct)
self.assertEqual(str(template), correct)

def test_cmp(self):
tmp1 = Template(self.name)
tmp2 = Template(name=self.name)
tmp3 = Template(self.name, [])
tmp4 = Template(name=self.name, params=[])
tmp5 = Template(self.name, self.params)
tmp6 = Template(name=self.name, params=self.params)

for tmpA, tmpB in permutations((tmp1, tmp2, tmp3, tmp4), 2):
self.assertEqual(tmpA, tmpB)

for tmpA, tmpB in permutations((tmp5, tmp6), 2):
self.assertEqual(tmpA, tmpB)

for tmpA in (tmp5, tmp6):
for tmpB in (tmp1, tmp2, tmp3, tmp4):
self.assertNotEqual(tmpA, tmpB)
self.assertNotEqual(tmpB, tmpA)

if __name__ == "__main__":
unittest.main(verbosity=2)

+ 108
- 0
tests/test_tokens.py View File

@@ -0,0 +1,108 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from __future__ import unicode_literals
import unittest

from mwparserfromhell.compat import py3k
from mwparserfromhell.parser import tokens

class TestTokens(unittest.TestCase):
"""Test cases for the Token class and its subclasses."""

def test_issubclass(self):
"""check that all classes within the tokens module are really Tokens"""
for name in tokens.__all__:
klass = getattr(tokens, name)
self.assertTrue(issubclass(klass, tokens.Token))
self.assertIsInstance(klass(), klass)
self.assertIsInstance(klass(), tokens.Token)

def test_attributes(self):
"""check that Token attributes can be managed properly"""
token1 = tokens.Token()
token2 = tokens.Token(foo="bar", baz=123)

self.assertEqual("bar", token2.foo)
self.assertEqual(123, token2.baz)
self.assertRaises(KeyError, lambda: token1.foo)
self.assertRaises(KeyError, lambda: token2.bar)

token1.spam = "eggs"
token2.foo = "ham"
del token2.baz

self.assertEqual("eggs", token1.spam)
self.assertEqual("ham", token2.foo)
self.assertRaises(KeyError, lambda: token2.baz)
self.assertRaises(KeyError, delattr, token2, "baz")

def test_repr(self):
"""check that repr() on a Token works as expected"""
token1 = tokens.Token()
token2 = tokens.Token(foo="bar", baz=123)
token3 = tokens.Text(text="earwig" * 100)
hundredchars = ("earwig" * 100)[:97] + "..."

self.assertEqual("Token()", repr(token1))
if py3k:
token2repr1 = "Token(foo='bar', baz=123)"
token2repr2 = "Token(baz=123, foo='bar')"
token3repr = "Text(text='" + hundredchars + "')"
else:
token2repr1 = "Token(foo=u'bar', baz=123)"
token2repr2 = "Token(baz=123, foo=u'bar')"
token3repr = "Text(text=u'" + hundredchars + "')"
token2repr = repr(token2)
self.assertTrue(token2repr == token2repr1 or token2repr == token2repr2)
self.assertEqual(token3repr, repr(token3))

def test_equality(self):
"""check that equivalent tokens are considered equal"""
token1 = tokens.Token()
token2 = tokens.Token()
token3 = tokens.Token(foo="bar", baz=123)
token4 = tokens.Text(text="asdf")
token5 = tokens.Text(text="asdf")
token6 = tokens.TemplateOpen(text="asdf")

self.assertEqual(token1, token2)
self.assertEqual(token2, token1)
self.assertEqual(token4, token5)
self.assertEqual(token5, token4)
self.assertNotEqual(token1, token3)
self.assertNotEqual(token2, token3)
self.assertNotEqual(token4, token6)
self.assertNotEqual(token5, token6)

def test_repr_equality(self):
"check that eval(repr(token)) == token"
tests = [
tokens.Token(),
tokens.Token(foo="bar", baz=123),
tokens.Text(text="earwig")
]
for token in tests:
self.assertEqual(token, eval(repr(token), vars(tokens)))

if __name__ == "__main__":
unittest.main(verbosity=2)

+ 67
- 0
tests/test_utils.py View File

@@ -0,0 +1,67 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from __future__ import unicode_literals
import unittest

from mwparserfromhell.nodes import Template, Text
from mwparserfromhell.smart_list import SmartList
from mwparserfromhell.utils import parse_anything
from mwparserfromhell.wikicode import Wikicode

from ._test_tree_equality import TreeEqualityTestCase

class TestUtils(TreeEqualityTestCase):
"""Tests for the utils module, which provides parse_anything()."""

def test_parse_anything_valid(self):
"""tests for valid input to utils.parse_anything()"""
wrap = lambda L: Wikicode(SmartList(L))
textify = lambda L: wrap([Text(item) for item in L])
tests = [
(wrap([Text("foobar")]), textify(["foobar"])),
(Template(wrap([Text("spam")])),
wrap([Template(textify(["spam"]))])),
("fóóbar", textify(["fóóbar"])),
(b"foob\xc3\xa1r", textify(["foobár"])),
(123, textify(["123"])),
(True, textify(["True"])),
(None, wrap([])),
([Text("foo"), Text("bar"), Text("baz")],
textify(["foo", "bar", "baz"])),
([wrap([Text("foo")]), Text("bar"), "baz", 123, 456],
textify(["foo", "bar", "baz", "123", "456"])),
([[[([[((("foo",),),)], "bar"],)]]], textify(["foo", "bar"]))
]
for test, valid in tests:
self.assertWikicodeEqual(valid, parse_anything(test))

def test_parse_anything_invalid(self):
"""tests for invalid input to utils.parse_anything()"""
self.assertRaises(ValueError, parse_anything, Ellipsis)
self.assertRaises(ValueError, parse_anything, object)
self.assertRaises(ValueError, parse_anything, object())
self.assertRaises(ValueError, parse_anything, type)
self.assertRaises(ValueError, parse_anything, ["foo", [object]])

if __name__ == "__main__":
unittest.main(verbosity=2)

+ 599
- 0
tests/tokenizer/templates.mwtest View File

@@ -0,0 +1,599 @@
name: no_params
label: simplest type of template
input: "{{template}}"
output: [TemplateOpen(), Text(text="template"), TemplateClose()]

---

name: one_param_unnamed
label: basic template with one unnamed parameter
input: "{{foo|bar}}"
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateClose()]

---

name: one_param_named
label: basic template with one named parameter
input: "{{foo|bar=baz}}"
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]

---

name: multiple_unnamed_params
label: basic template with multiple unnamed parameters
input: "{{foo|bar|baz|biz|buzz}}"
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateClose()]

---

name: multiple_named_params
label: basic template with multiple named parameters
input: "{{foo|bar=baz|biz=buzz|buff=baff|usr=bin}}"
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), Text(text="buzz"), TemplateParamSeparator(), Text(text="buff"), TemplateParamEquals(), Text(text="baff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamEquals(), Text(text="bin"), TemplateClose()]

---

name: multiple_mixed_params
label: basic template with multiple unnamed/named parameters
input: "{{foo|bar=baz|biz|buzz=buff|usr|bin}}"
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateParamEquals(), Text(text="buff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamSeparator(), Text(text="bin"), TemplateClose()]

---

name: multiple_mixed_params2
label: basic template with multiple unnamed/named parameters in another order
input: "{{foo|bar|baz|biz=buzz|buff=baff|usr=bin}}"
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), Text(text="buzz"), TemplateParamSeparator(), Text(text="buff"), TemplateParamEquals(), Text(text="baff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamEquals(), Text(text="bin"), TemplateClose()]

---

name: nested_unnamed_param
label: nested template as an unnamed parameter
input: "{{foo|{{bar}}}}"
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()]

---

name: nested_named_param_value
label: nested template as a parameter value with a named parameter
input: "{{foo|bar={{baz}}}}"
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()]

---

name: nested_named_param_name_and_value
label: nested templates as a parameter name and value
input: "{{foo|{{bar}}={{baz}}}}"
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()]

---

name: nested_name_start
label: nested template at the beginning of a template name
input: "{{{{foo}}bar}}"
output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose()]

---

name: nested_name_start_unnamed_param
label: nested template at the beginning of a template name and as an unnamed parameter
input: "{{{{foo}}bar|{{baz}}}}"
output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()]

---

name: nested_name_start_named_param_value
label: nested template at the beginning of a template name and as a parameter value with a named parameter
input: "{{{{foo}}bar|baz={{biz}}}}"
output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()]

---

name: nested_name_start_named_param_name_and_value
label: nested template at the beginning of a template name and as a parameter name and value
input: "{{{{foo}}bar|{{baz}}={{biz}}}}"
output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()]

---

name: nested_name_end
label: nested template at the end of a template name
input: "{{foo{{bar}}}}"
output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()]

---

name: nested_name_end_unnamed_param
label: nested template at the end of a template name and as an unnamed parameter
input: "{{foo{{bar}}|{{baz}}}}"
output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()]

---

name: nested_name_end_named_param_value
label: nested template at the end of a template name and as a parameter value with a named parameter
input: "{{foo{{bar}}|baz={{biz}}}}"
output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()]

---

name: nested_name_end_named_param_name_and_value
label: nested template at the end of a template name and as a parameter name and value
input: "{{foo{{bar}}|{{baz}}={{biz}}}}"
output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()]

---

name: nested_name_mid
label: nested template in the middle of a template name
input: "{{foo{{bar}}baz}}"
output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose()]

---

name: nested_name_mid_unnamed_param
label: nested template in the middle of a template name and as an unnamed parameter
input: "{{foo{{bar}}baz|{{biz}}}}"
output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()]

---

name: nested_name_mid_named_param_value
label: nested template in the middle of a template name and as a parameter value with a named parameter
input: "{{foo{{bar}}baz|biz={{buzz}}}}"
output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()]

---

name: nested_name_mid_named_param_name_and_value
label: nested template in the middle of a template name and as a parameter name and value
input: "{{foo{{bar}}baz|{{biz}}={{buzz}}}}"
output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()]

---

name: nested_name_start_end
label: nested template at the beginning and end of a template name
input: "{{{{foo}}{{bar}}}}"
output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()]

---

name: nested_name_start_end_unnamed_param
label: nested template at the beginning and end of a template name and as an unnamed parameter
input: "{{{{foo}}{{bar}}|{{baz}}}}"
output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()]

---

name: nested_name_start_end_named_param_value
label: nested template at the beginning and end of a template name and as a parameter value with a named parameter
input: "{{{{foo}}{{bar}}|baz={{biz}}}}"
output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()]

---

name: nested_name_start_end_named_param_name_and_value
label: nested template at the beginning and end of a template name and as a parameter name and value
input: "{{{{foo}}{{bar}}|{{baz}}={{biz}}}}"
output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()]

---

name: nested_names_multiple
label: multiple nested templates within nested templates
input: "{{{{{{{{foo}}bar}}baz}}biz}}"
output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateClose()]

---

name: nested_names_multiple_unnamed_param
label: multiple nested templates within nested templates with a nested unnamed parameter
input: "{{{{{{{{foo}}bar}}baz}}biz|{{buzz}}}}"
output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()]

---

name: nested_names_multiple_named_param_value
label: multiple nested templates within nested templates with a nested parameter value in a named parameter
input: "{{{{{{{{foo}}bar}}baz}}biz|buzz={{bin}}}}"
output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateParamEquals(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()]

---

name: nested_names_multiple_named_param_name_and_value
label: multiple nested templates within nested templates with a nested parameter name and value
input: "{{{{{{{{foo}}bar}}baz}}biz|{{buzz}}={{bin}}}}"
output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()]

---

name: mixed_nested_templates
label: mixed assortment of nested templates within template names, parameter names, and values
input: "{{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}"
output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), Text(text="biz"), TemplateClose(), Text(text="buzz"), TemplateClose(), Text(text="usr"), TemplateParamSeparator(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()]

---

name: newlines_start
label: a newline at the start of a template name
input: "{{\nfoobar}}"
output: [TemplateOpen(), Text(text="\nfoobar"), TemplateClose()]

---

name: newlines_end
label: a newline at the end of a template name
input: "{{foobar\n}}"
output: [TemplateOpen(), Text(text="foobar\n"), TemplateClose()]

---

name: newlines_start_end
label: a newline at the start and end of a template name
input: "{{\nfoobar\n}}"
output: [TemplateOpen(), Text(text="\nfoobar\n"), TemplateClose()]

---

name: newlines_mid
label: a newline at the middle of a template name
input: "{{foo\nbar}}"
output: [Text(text="{{foo\nbar}}")]

---

name: newlines_start_mid
label: a newline at the start and middle of a template name
input: "{{\nfoo\nbar}}"
output: [Text(text="{{\nfoo\nbar}}")]

---

name: newlines_mid_end
label: a newline at the middle and end of a template name
input: "{{foo\nbar\n}}"
output: [Text(text="{{foo\nbar\n}}")]

---

name: newlines_start_mid_end
label: a newline at the start, middle, and end of a template name
input: "{{\nfoo\nbar\n}}"
output: [Text(text="{{\nfoo\nbar\n}}")]

---

name: newlines_unnamed_param
label: newlines within an unnamed template parameter
input: "{{foo|\nb\nar\n}}"
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()]

---

name: newlines_enclose_template_name_unnamed_param
label: newlines enclosing a template name and within an unnamed template parameter
input: "{{\nfoo\n|\nb\nar\n}}"
output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()]

---

name: newlines_within_template_name_unnamed_param
label: newlines within a template name and within an unnamed template parameter
input: "{{\nfo\no\n|\nb\nar\n}}"
output: [Text(text="{{\nfo\no\n|\nb\nar\n}}")]

---

name: newlines_enclose_template_name_named_param_value
label: newlines enclosing a template name and within a named parameter value
input: "{{\nfoo\n|1=\nb\nar\n}}"
output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nb\nar\n"), TemplateClose()]

---

name: newlines_within_template_name_named_param_value
label: newlines within a template name and within a named parameter value
input: "{{\nf\noo\n|1=\nb\nar\n}}"
output: [Text(text="{{\nf\noo\n|1=\nb\nar\n}}")]

---

name: newlines_named_param_name
label: newlines within a parameter name
input: "{{foo|\nb\nar\n=baz}}"
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]

---

name: newlines_named_param_name_param_value
label: newlines within a parameter name and within a parameter value
input: "{{foo|\nb\nar\n=\nba\nz\n}}"
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nba\nz\n"), TemplateClose()]

---

name: newlines_enclose_template_name_named_param_name
label: newlines enclosing a template name and within a parameter name
input: "{{\nfoo\n|\nb\nar\n=baz}}"
output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]

---

name: newlines_enclose_template_name_named_param_name_param_value
label: newlines enclosing a template name and within a parameter name and within a parameter value
input: "{{\nfoo\n|\nb\nar\n=\nba\nz\n}}"
output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nba\nz\n"), TemplateClose()]

---

name: newlines_within_template_name_named_param_name
label: newlines within a template name and within a parameter name
input: "{{\nfo\no\n|\nb\nar\n=baz}}"
output: [Text(text="{{\nfo\no\n|\nb\nar\n=baz}}")]

---

name: newlines_within_template_name_named_param_name_param_value
label: newlines within a template name and within a parameter name and within a parameter value
input: "{{\nf\noo\n|\nb\nar\n=\nba\nz\n}}"
output: [Text(text="{{\nf\noo\n|\nb\nar\n=\nba\nz\n}}")]

---

name: newlines_wildcard
label: a random, complex assortment of templates and newlines
input: "{{\nfoo\n|\nb\nar\n=\nb\naz\n|\nb\nuz\n}}"
output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nb\naz\n"), TemplateParamSeparator(), Text(text="\nb\nuz\n"), TemplateClose()]

---

name: newlines_wildcard_redux
label: an even more random and complex assortment of templates and newlines
input: "{{\nfoo\n|\n{{\nbar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}"
output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\n"), TemplateOpen(), Text(text="\nbar\n"), TemplateParamSeparator(), Text(text="\nb\naz\n"), TemplateParamEquals(), Text(text="\nb\niz\n"), TemplateClose(), Text(text="\n"), TemplateParamEquals(), Text(text="\nb\nuzz\n"), TemplateClose()]

---

name: newlines_wildcard_redux_invalid
label: a variation of the newlines_wildcard_redux test that is invalid
input: "{{\nfoo\n|\n{{\nb\nar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}"
output: [Text(text="{{\nfoo\n|\n{{\nb\nar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}")]

---

name: invalid_name_left_brace_middle
label: invalid characters in template name: left brace in middle
input: "{{foo{bar}}"
output: [Text(text="{{foo{bar}}")]

---

name: invalid_name_right_brace_middle
label: invalid characters in template name: right brace in middle
input: "{{foo}bar}}"
output: [Text(text="{{foo}bar}}")]

---

name: invalid_name_left_braces
label: invalid characters in template name: two left braces in middle
input: "{{foo{b{ar}}"
output: [Text(text="{{foo{b{ar}}")]

---

name: invalid_name_left_bracket_middle
label: invalid characters in template name: left bracket in middle
input: "{{foo[bar}}"
output: [Text(text="{{foo[bar}}")]

---

name: invalid_name_right_bracket_middle
label: invalid characters in template name: right bracket in middle
input: "{{foo]bar}}"
output: [Text(text="{{foo]bar}}")]

---

name: invalid_name_left_bracket_start
label: invalid characters in template name: left bracket at start
input: "{{[foobar}}"
output: [Text(text="{{[foobar}}")]

---

name: invalid_name_right_bracket_start
label: invalid characters in template name: right bracket at end
input: "{{foobar]}}"
output: [Text(text="{{foobar]}}")]

---

name: valid_name_left_brace_start
label: valid characters in template name: left brace at start
input: "{{{foobar}}"
output: [Text(text="{"), TemplateOpen(), Text(text="foobar"), TemplateClose()]

---

name: valid_unnamed_param_left_brace
label: valid characters in unnamed template parameter: left brace
input: "{{foo|ba{r}}"
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r"), TemplateClose()]

---

name: valid_unnamed_param_braces
label: valid characters in unnamed template parameter: left and right braces
input: "{{foo|ba{r}}}"
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r"), TemplateClose(), Text(text="}")]

---

name: valid_param_name_braces
label: valid characters in template parameter name: left and right braces
input: "{{foo|ba{r}=baz}}"
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r}"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]

---

name: valid_param_name_brackets
label: valid characters in unnamed template parameter: left and right brackets
input: "{{foo|ba[r]=baz}}"
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba[r]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]

---

name: valid_param_name_double_left_brackets
label: valid characters in unnamed template parameter: double left brackets
input: "{{foo|bar[[in\nvalid=baz}}"
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar[[in\nvalid"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]

---

name: valid_param_name_double_right_brackets
label: valid characters in unnamed template parameter: double right brackets
input: "{{foo|bar]]=baz}}"
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar]]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]

---

name: valid_param_name_double_brackets
label: valid characters in unnamed template parameter: double left and right brackets
input: "{{foo|bar[[in\nvalid]]=baz}}"
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar[[in\nvalid]]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()]

---

name: invalid_param_name_double_left_braces
label: invalid characters in template parameter name: double left braces
input: "{{foo|bar{{in\nvalid=baz}}"
output: [Text(text="{{foo|bar{{in\nvalid=baz}}")]

---

name: invalid_param_name_double_braces
label: invalid characters in template parameter name: double left and right braces
input: "{{foo|bar{{in\nvalid}}=baz}}"
output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar{{in\nvalid"), TemplateClose(), Text(text="=baz}}")]

---

name: incomplete_plain
label: incomplete templates that should fail gracefully: no close whatsoever
input: "{{stuff}} {{foobar"
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar")]

---

name: incomplete_right_brace
label: incomplete templates that should fail gracefully: only one right brace
input: "{{stuff}} {{foobar}"
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar}")]

---

name: incomplete_pipe
label: incomplete templates that should fail gracefully: a pipe
input: "{{stuff}} {{foobar|"
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar|")]

---

name: incomplete_unnamed_param
label: incomplete templates that should fail gracefully: an unnamed parameter
input: "{{stuff}} {{foo|bar"
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar")]

---

name: incomplete_unnamed_param_pipe
label: incomplete templates that should fail gracefully: an unnamed parameter, then a pipe
input: "{{stuff}} {{foo|bar|"
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|")]

---

name: incomplete_valueless_param
label: incomplete templates that should fail gracefully: an a named parameter with no value
input: "{{stuff}} {{foo|bar="
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=")]

---

name: incomplete_valueless_param_pipe
label: incomplete templates that should fail gracefully: a named parameter with no value, then a pipe
input: "{{stuff}} {{foo|bar=|"
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=|")]

---

name: incomplete_named_param
label: incomplete templates that should fail gracefully: a named parameter with a value
input: "{{stuff}} {{foo|bar=baz"
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz")]

---

name: incomplete_named_param_pipe
label: incomplete templates that should fail gracefully: a named parameter with a value, then a paipe
input: "{{stuff}} {{foo|bar=baz|"
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|")]

---

name: incomplete_two_unnamed_params
label: incomplete templates that should fail gracefully: two unnamed parameters
input: "{{stuff}} {{foo|bar|baz"
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz")]

---

name: incomplete_unnamed_param_valueless_param
label: incomplete templates that should fail gracefully: an unnamed parameter, then a named parameter with no value
input: "{{stuff}} {{foo|bar|baz="
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz=")]

---

name: incomplete_unnamed_param_named_param
label: incomplete templates that should fail gracefully: an unnamed parameter, then a named parameter with a value
input: "{{stuff}} {{foo|bar|baz=biz"
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz=biz")]

---

name: incomplete_named_param_unnamed_param
label: incomplete templates that should fail gracefully: a named parameter with a value, then an unnamed parameter
input: "{{stuff}} {{foo|bar=baz|biz"
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz")]

---

name: incomplete_named_param_valueless_param
label: incomplete templates that should fail gracefully: a named parameter with a value, then a named parameter with no value
input: "{{stuff}} {{foo|bar=baz|biz="
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz=")]

---

name: incomplete_two_named_params
label: incomplete templates that should fail gracefully: two named parameters with values
input: "{{stuff}} {{foo|bar=baz|biz=buzz"
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz=buzz")]

---

name: incomplete_nested_template_as_unnamed_param
label: incomplete templates that should fail gracefully: a valid nested template as an unnamed parameter
input: "{{stuff}} {{foo|{{bar}}"
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|"), TemplateOpen(), Text(text="bar"), TemplateClose()]

---

name: incomplete_nested_template_as_param_value
label: incomplete templates that should fail gracefully: a valid nested template as a parameter value
input: "{{stuff}} {{foo|bar={{baz}}"
output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar="), TemplateOpen(), Text(text="baz"), TemplateClose()]

+ 25
- 0
tests/tokenizer/text.mwtest View File

@@ -0,0 +1,25 @@
name: basic
label: sanity check for basic text parsing, no gimmicks
input: "foobar"
output: [Text(text="foobar")]

---

name: newlines
label: slightly more complex text parsing, with newlines
input: "This is a line of text.\nThis is another line of text.\nThis is another."
output: [Text(text="This is a line of text.\nThis is another line of text.\nThis is another.")]

---

name: unicode
label: ensure unicode data is handled properly
input: "Thís ís å sëñtënce with diœcritiçs."
output: [Text(text="Thís ís å sëñtënce with diœcritiçs.")]

---

name: unicode2
label: additional unicode check for non-BMP codepoints
input: "𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰"
output: [Text(text="𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰")]

Loading…
Cancel
Save