From 06d3036de29dde3414671a73ea5697fd82310578 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 11 Dec 2012 18:07:13 -0500 Subject: [PATCH 001/115] Fix a certain bug in SmartList with Py3k (closes #17) --- mwparserfromhell/smart_list.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py index 9a77e19..5df6a0e 100644 --- a/mwparserfromhell/smart_list.py +++ b/mwparserfromhell/smart_list.py @@ -76,7 +76,7 @@ class SmartList(list): def __getitem__(self, key): if not isinstance(key, slice): return super(SmartList, self).__getitem__(key) - sliceinfo = [key.start, key.stop, 1 if not key.step else key.step] + sliceinfo = [key.start or 0, key.stop or 0, key.step or 1] child = _ListProxy(self, sliceinfo) self._children[id(child)] = (child, sliceinfo) return child From 6881caf0bd64389adcec8956dc97256f8be3d46c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 31 Dec 2012 03:09:11 -0500 Subject: [PATCH 002/115] Fix StringMixIn's methods taking option arguments (thanks Sigma). --- mwparserfromhell/string_mixin.py | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index d63f25d..075ae03 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -125,19 +125,29 @@ class StringMixIn(object): @inheritdoc def center(self, width, fillchar=None): + if fillchar is None: + return self.__unicode__().center(width) return self.__unicode__().center(width, fillchar) @inheritdoc - def count(self, sub=None, start=None, end=None): + def count(self, sub, start=None, end=None): return self.__unicode__().count(sub, start, end) if not py3k: @inheritdoc def decode(self, encoding=None, errors=None): + if errors is None: + if encoding is None: + return self.__unicode__().decode() + return self.__unicode__().decode(encoding) return self.__unicode__().decode(encoding, errors) @inheritdoc def encode(self, encoding=None, errors=None): + if errors is None: + if encoding is None: + return self.__unicode__().encode() + return self.__unicode__().encode(encoding) return self.__unicode__().encode(encoding, errors) @inheritdoc @@ -146,10 +156,12 @@ class StringMixIn(object): @inheritdoc def expandtabs(self, tabsize=None): + if tabsize is None: + return self.__unicode__().expandtabs() return self.__unicode__().expandtabs(tabsize) @inheritdoc - def find(self, sub=None, start=None, end=None): + def find(self, sub, start=None, end=None): return self.__unicode__().find(sub, start, end) @inheritdoc @@ -157,7 +169,7 @@ class StringMixIn(object): return self.__unicode__().format(*args, **kwargs) @inheritdoc - def index(self, sub=None, start=None, end=None): + def index(self, sub, start=None, end=None): return self.__unicode__().index(sub, start, end) @inheritdoc @@ -202,6 +214,8 @@ class StringMixIn(object): @inheritdoc def ljust(self, width, fillchar=None): + if fillchar is None: + return self.__unicode__().ljust(width) return self.__unicode__().ljust(width, fillchar) @inheritdoc @@ -221,15 +235,17 @@ class StringMixIn(object): return self.__unicode__().replace(old, new, count) @inheritdoc - def rfind(self, sub=None, start=None, end=None): + def rfind(self, sub, start=None, end=None): return self.__unicode__().rfind(sub, start, end) @inheritdoc - def rindex(self, sub=None, start=None, end=None): + def rindex(self, sub, start=None, end=None): return self.__unicode__().rindex(sub, start, end) @inheritdoc def rjust(self, width, fillchar=None): + if fillchar is None: + return self.__unicode__().rjust(width) return self.__unicode__().rjust(width, fillchar) @inheritdoc @@ -238,6 +254,10 @@ class StringMixIn(object): @inheritdoc def rsplit(self, sep=None, maxsplit=None): + if maxsplit is None: + if sep is None: + return self.__unicode__().rsplit() + return self.__unicode__().rsplit(sep) return self.__unicode__().rsplit(sep, maxsplit) @inheritdoc @@ -246,10 +266,16 @@ class StringMixIn(object): @inheritdoc def split(self, sep=None, maxsplit=None): + if maxsplit is None: + if sep is None: + return self.__unicode__().split() + return self.__unicode__().split(sep) return self.__unicode__().split(sep, maxsplit) @inheritdoc def splitlines(self, keepends=None): + if keepends is None: + return self.__unicode__().splitlines() return self.__unicode__().splitlines(keepends) @inheritdoc @@ -269,8 +295,8 @@ class StringMixIn(object): return self.__unicode__().title() @inheritdoc - def translate(self, table, deletechars=None): - return self.__unicode__().translate(table, deletechars) + def translate(self, table): + return self.__unicode__().translate(table) @inheritdoc def upper(self): From 11cf5def7538ee8fc3954aab8bc9107d39d87c7f Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 7 Jan 2013 17:47:05 -0500 Subject: [PATCH 003/115] Fix handling of sections headers with equal signs (closes #20) --- mwparserfromhell/parser/tokenizer.c | 44 ++++++++++++++++-------------------- mwparserfromhell/parser/tokenizer.py | 4 ++-- 2 files changed, 22 insertions(+), 26 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index cc1b4dd..40ec723 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -759,11 +759,10 @@ Tokenizer_parse_heading(Tokenizer* self) if (BAD_ROUTE) { RESET_ROUTE(); self->head = reset + best - 1; - char text[best + 1]; - for (i = 0; i < best; i++) text[i] = *"="; - text[best] = *""; - if (Tokenizer_write_text_then_stack(self, text)) - return -1; + for (i = 0; i < best; i++) { + if (Tokenizer_write_text(self, *"=")) + return -1; + } self->global ^= GL_HEADING; return 0; } @@ -799,13 +798,12 @@ Tokenizer_parse_heading(Tokenizer* self) Py_DECREF(token); if (heading->level < best) { diff = best - heading->level; - char difftext[diff + 1]; - for (i = 0; i < diff; i++) difftext[i] = *"="; - difftext[diff] = *""; - if (Tokenizer_write_text_then_stack(self, difftext)) { - Py_DECREF(heading->title); - free(heading); - return -1; + for (i = 0; i < diff; i++) { + if (Tokenizer_write_text(self, *"=")) { + Py_DECREF(heading->title); + free(heading); + return -1; + } } } if (Tokenizer_write_all(self, heading->title)) { @@ -851,22 +849,20 @@ Tokenizer_handle_heading_end(Tokenizer* self) RESET_ROUTE(); if (level < best) { diff = best - level; - char difftext[diff + 1]; - for (i = 0; i < diff; i++) difftext[i] = *"="; - difftext[diff] = *""; - if (Tokenizer_write_text_then_stack(self, difftext)) - return NULL; + for (i = 0; i < diff; i++) { + if (Tokenizer_write_text(self, *"=")) + return NULL; + } } self->head = reset + best - 1; } else { - char text[best + 1]; - for (i = 0; i < best; i++) text[i] = *"="; - text[best] = *""; - if (Tokenizer_write_text_then_stack(self, text)) { - Py_DECREF(after->title); - free(after); - return NULL; + for (i = 0; i < best; i++) { + if (Tokenizer_write_text(self, *"=")) { + Py_DECREF(after->title); + free(after); + return NULL; + } } if (Tokenizer_write_all(self, after->title)) { Py_DECREF(after->title); diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 5b0e976..455079a 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -339,14 +339,14 @@ class Tokenizer(object): current = int(log(self._context / contexts.HEADING_LEVEL_1, 2)) + 1 level = min(current, min(best, 6)) - try: + try: # Try to check for a heading closure after this one after, after_level = self._parse(self._context) except BadRoute: if level < best: self._write_text("=" * (best - level)) self._head = reset + best - 1 return self._pop(), level - else: + else: # Found another closure self._write_text("=" * best) self._write_all(after) return self._pop(), after_level From cd5cc6a7d084a464d67d6b52f369ae81eb31b376 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 17 Jan 2013 02:26:04 -0500 Subject: [PATCH 004/115] Update copyright notices for 2013. --- LICENSE | 2 +- docs/conf.py | 2 +- mwparserfromhell/__init__.py | 4 ++-- mwparserfromhell/nodes/__init__.py | 2 +- mwparserfromhell/nodes/argument.py | 2 +- mwparserfromhell/nodes/comment.py | 2 +- mwparserfromhell/nodes/extras/__init__.py | 2 +- mwparserfromhell/nodes/extras/attribute.py | 2 +- mwparserfromhell/nodes/extras/parameter.py | 2 +- mwparserfromhell/nodes/heading.py | 2 +- mwparserfromhell/nodes/html_entity.py | 2 +- mwparserfromhell/nodes/tag.py | 2 +- mwparserfromhell/nodes/template.py | 2 +- mwparserfromhell/nodes/text.py | 2 +- mwparserfromhell/nodes/wikilink.py | 2 +- mwparserfromhell/parser/__init__.py | 2 +- mwparserfromhell/parser/builder.py | 2 +- mwparserfromhell/parser/contexts.py | 2 +- mwparserfromhell/parser/tokenizer.py | 2 +- mwparserfromhell/parser/tokens.py | 2 +- mwparserfromhell/smart_list.py | 2 +- mwparserfromhell/string_mixin.py | 2 +- mwparserfromhell/utils.py | 2 +- mwparserfromhell/wikicode.py | 2 +- setup.py | 2 +- 25 files changed, 26 insertions(+), 26 deletions(-) diff --git a/LICENSE b/LICENSE index 49b719e..413f1c4 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (C) 2012 Ben Kurtovic +Copyright (C) 2012-2013 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/docs/conf.py b/docs/conf.py index cff089b..9fa1e02 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -42,7 +42,7 @@ master_doc = 'index' # General information about the project. project = u'mwparserfromhell' -copyright = u'2012 Ben Kurtovic' +copyright = u'2012, 2013 Ben Kurtovic' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index 4f73a0e..e18000b 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -29,7 +29,7 @@ outrageously powerful parser for `MediaWiki `_ wikicode. from __future__ import unicode_literals __author__ = "Ben Kurtovic" -__copyright__ = "Copyright (C) 2012 Ben Kurtovic" +__copyright__ = "Copyright (C) 2012, 2013 Ben Kurtovic" __license__ = "MIT License" __version__ = "0.2.dev" __email__ = "ben.kurtovic@verizon.net" diff --git a/mwparserfromhell/nodes/__init__.py b/mwparserfromhell/nodes/__init__.py index 86a8746..faaa0b2 100644 --- a/mwparserfromhell/nodes/__init__.py +++ b/mwparserfromhell/nodes/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/argument.py b/mwparserfromhell/nodes/argument.py index 918fac6..06facb4 100644 --- a/mwparserfromhell/nodes/argument.py +++ b/mwparserfromhell/nodes/argument.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/comment.py b/mwparserfromhell/nodes/comment.py index 3d06261..b34c29e 100644 --- a/mwparserfromhell/nodes/comment.py +++ b/mwparserfromhell/nodes/comment.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/extras/__init__.py b/mwparserfromhell/nodes/extras/__init__.py index 2ce4bb1..e860f01 100644 --- a/mwparserfromhell/nodes/extras/__init__.py +++ b/mwparserfromhell/nodes/extras/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/extras/attribute.py b/mwparserfromhell/nodes/extras/attribute.py index 648bca0..ebb65ab 100644 --- a/mwparserfromhell/nodes/extras/attribute.py +++ b/mwparserfromhell/nodes/extras/attribute.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/extras/parameter.py b/mwparserfromhell/nodes/extras/parameter.py index 8c5e654..c1c10a0 100644 --- a/mwparserfromhell/nodes/extras/parameter.py +++ b/mwparserfromhell/nodes/extras/parameter.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/heading.py b/mwparserfromhell/nodes/heading.py index 8f389d3..f001234 100644 --- a/mwparserfromhell/nodes/heading.py +++ b/mwparserfromhell/nodes/heading.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py index a3c6079..221040b 100644 --- a/mwparserfromhell/nodes/html_entity.py +++ b/mwparserfromhell/nodes/html_entity.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/tag.py b/mwparserfromhell/nodes/tag.py index 5873a49..eaf2b6e 100644 --- a/mwparserfromhell/nodes/tag.py +++ b/mwparserfromhell/nodes/tag.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index 08ab4a5..e34ba7a 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/text.py b/mwparserfromhell/nodes/text.py index 783d8eb..06e5144 100644 --- a/mwparserfromhell/nodes/text.py +++ b/mwparserfromhell/nodes/text.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py index 73f2a8d..f880016 100644 --- a/mwparserfromhell/nodes/wikilink.py +++ b/mwparserfromhell/nodes/wikilink.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py index 5baa687..074b9ba 100644 --- a/mwparserfromhell/parser/__init__.py +++ b/mwparserfromhell/parser/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/parser/builder.py b/mwparserfromhell/parser/builder.py index 61a8209..2cd7831 100644 --- a/mwparserfromhell/parser/builder.py +++ b/mwparserfromhell/parser/builder.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py index 9d41870..b65946c 100644 --- a/mwparserfromhell/parser/contexts.py +++ b/mwparserfromhell/parser/contexts.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 455079a..a2b405c 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/parser/tokens.py b/mwparserfromhell/parser/tokens.py index d23810e..b11ca15 100644 --- a/mwparserfromhell/parser/tokens.py +++ b/mwparserfromhell/parser/tokens.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py index 5df6a0e..625307f 100644 --- a/mwparserfromhell/smart_list.py +++ b/mwparserfromhell/smart_list.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index 075ae03..d7a0749 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/utils.py b/mwparserfromhell/utils.py index 41a2044..83264e2 100644 --- a/mwparserfromhell/utils.py +++ b/mwparserfromhell/utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 2c532f5..8d8ebe2 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/setup.py b/setup.py index cc034e5..445473e 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- # -# Copyright (C) 2012 Ben Kurtovic +# Copyright (C) 2012-2013 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal From d6f2723a06c45d92e478cffeedf3ce2c4be21a43 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 24 Jan 2013 03:07:36 -0500 Subject: [PATCH 005/115] Fix safety checks on template params in some odd cases (closes #24). Also, fix parsing of wikilinks in both tokenizers such that newlines in any location within the title are an automatic failure. --- mwparserfromhell/parser/tokenizer.c | 57 ++++++++++++++++++++++++++---------- mwparserfromhell/parser/tokenizer.h | 1 + mwparserfromhell/parser/tokenizer.py | 16 ++++++---- 3 files changed, 52 insertions(+), 22 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 40ec723..09649a7 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -1144,17 +1144,24 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) Tokenizer_fail_route(self); return; } - if (context & (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE)) { - if (data == *"{" || data == *"}" || data == *"[" || data == *"]") { + if (context & LC_WIKILINK_TITLE) { + if (data == *"]" || data == *"{") + self->topstack->context |= LC_FAIL_NEXT; + else if (data == *"\n" || data == *"[" || data == *"}") + Tokenizer_fail_route(self); + return; + } + if (context & LC_TEMPLATE_NAME) { + if (data == *"{" || data == *"}" || data == *"[") { self->topstack->context |= LC_FAIL_NEXT; return; } - if (data == *"|") { - if (context & LC_FAIL_ON_TEXT) { - self->topstack->context ^= LC_FAIL_ON_TEXT; - return; - } + if (data == *"]") { + Tokenizer_fail_route(self); + return; } + if (data == *"|") + return; } else if (context & (LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)) { if (context & LC_FAIL_ON_EQUALS) { @@ -1210,6 +1217,28 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) } /* + Unset any safety-checking contexts set by Tokenizer_verify_safe(). Used + when we preserve a context but previous data becomes invalid, like when + moving between template parameters. +*/ +static void +Tokenizer_reset_safety_checks(Tokenizer* self) +{ + static int checks[] = { + LC_HAS_TEXT, LC_FAIL_ON_TEXT, LC_FAIL_NEXT, LC_FAIL_ON_LBRACE, + LC_FAIL_ON_RBRACE, LC_FAIL_ON_EQUALS, 0}; + int context = self->topstack->context, i = 0, this; + while (1) { + this = checks[i]; + if (!this) + return; + if (context & this) + self->topstack->context ^= this; + i++; + } +} + +/* Parse the wikicode string, using context for when to stop. */ static PyObject* @@ -1274,6 +1303,7 @@ Tokenizer_parse(Tokenizer* self, int context) self->topstack->context ^= LC_FAIL_NEXT; } else if (this == *"|" && this_context & LC_TEMPLATE) { + Tokenizer_reset_safety_checks(self); if (Tokenizer_handle_template_param(self)) return NULL; } @@ -1294,15 +1324,10 @@ Tokenizer_parse(Tokenizer* self, int context) Tokenizer_write_text(self, this); } else if (this == next && next == *"[") { - if (!(this_context & LC_WIKILINK_TITLE)) { - if (Tokenizer_parse_wikilink(self)) - return NULL; - if (self->topstack->context & LC_FAIL_NEXT) - self->topstack->context ^= LC_FAIL_NEXT; - } - else { - Tokenizer_write_text(self, this); - } + if (Tokenizer_parse_wikilink(self)) + return NULL; + if (self->topstack->context & LC_FAIL_NEXT) + self->topstack->context ^= LC_FAIL_NEXT; } else if (this == *"|" && this_context & LC_WIKILINK_TITLE) { if (Tokenizer_handle_wikilink_separator(self)) diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index dffa0fb..3293a8f 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -206,6 +206,7 @@ static int Tokenizer_really_parse_entity(Tokenizer*); static int Tokenizer_parse_entity(Tokenizer*); static int Tokenizer_parse_comment(Tokenizer*); static void Tokenizer_verify_safe(Tokenizer*, int, Py_UNICODE); +static void Tokenizer_reset_safety_checks(Tokenizer*); static PyObject* Tokenizer_parse(Tokenizer*, int); static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*); diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index a2b405c..eead131 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -213,17 +213,21 @@ class Tokenizer(object): self._write_all(argument) self._write(tokens.ArgumentClose()) - def _verify_safe(self, unsafes): + def _verify_safe(self, unsafes, strip=True): """Verify that there are no unsafe characters in the current stack. The route will be failed if the name contains any element of *unsafes* - in it (not merely at the beginning or end). This is used when parsing a - template name or parameter key, which cannot contain newlines. + in it. This is used when parsing template names, parameter keys, and so + on, which cannot contain newlines and some other characters. If *strip* + is ``True``, the text will be stripped of whitespace, since this is + allowed at the ends of certain elements but not between text. """ self._push_textbuffer() if self._stack: text = [tok for tok in self._stack if isinstance(tok, tokens.Text)] - text = "".join([token.text for token in text]).strip() + text = "".join([token.text for token in text]) + if strip: + text = text.strip() if text and any([unsafe in text for unsafe in unsafes]): self._fail_route() @@ -291,7 +295,7 @@ class Tokenizer(object): def _handle_wikilink_separator(self): """Handle the separator between a wikilink's title and its text.""" - self._verify_safe(["\n", "{", "}", "[", "]"]) + self._verify_safe(["\n", "{", "}", "[", "]"], strip=False) self._context ^= contexts.WIKILINK_TITLE self._context |= contexts.WIKILINK_TEXT self._write(tokens.WikilinkSeparator()) @@ -299,7 +303,7 @@ class Tokenizer(object): def _handle_wikilink_end(self): """Handle the end of a wikilink at the head of the string.""" if self._context & contexts.WIKILINK_TITLE: - self._verify_safe(["\n", "{", "}", "[", "]"]) + self._verify_safe(["\n", "{", "}", "[", "]"], strip=False) self._head += 1 return self._pop() From 357e421fe87c21c9cb8252333d4bf5f5d1d26dfa Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 24 Jan 2013 03:31:14 -0500 Subject: [PATCH 006/115] Text nodes should now appear a bit better in tree form. --- mwparserfromhell/nodes/text.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mwparserfromhell/nodes/text.py b/mwparserfromhell/nodes/text.py index 06e5144..60ba847 100644 --- a/mwparserfromhell/nodes/text.py +++ b/mwparserfromhell/nodes/text.py @@ -39,6 +39,9 @@ class Text(Node): def __strip__(self, normalize, collapse): return self + def __showtree__(self, write, get, mark): + write(str(self).encode("unicode_escape").decode("utf8")) + @property def value(self): """The actual text itself.""" From 28b124a96c8685c8c94eb51d8d29d508e8fe198c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 2 Feb 2013 23:10:58 -0500 Subject: [PATCH 007/115] Restarting work on unit tests with a test covering doc examples. --- README.rst | 4 +- mwparserfromhell/compat.py | 2 + tests/test_docs.py | 117 ++++++++++++++++++++++++++++++++++++++++++++ tests/test_parameter.py | 119 --------------------------------------------- tests/test_parser.py | 63 ------------------------ tests/test_template.py | 106 ---------------------------------------- 6 files changed, 122 insertions(+), 289 deletions(-) create mode 100644 tests/test_docs.py delete mode 100644 tests/test_parameter.py delete mode 100644 tests/test_parser.py delete mode 100644 tests/test_template.py diff --git a/README.rst b/README.rst index 77f12c7..3901103 100644 --- a/README.rst +++ b/README.rst @@ -124,7 +124,9 @@ following code (via the API_):: import mwparserfromhell API_URL = "http://en.wikipedia.org/w/api.php" def parse(title): - raw = urllib.urlopen(API_URL, data).read() + data = {"action": "query", "prop": "revisions", "rvlimit": 1, + "rvprop": "content", "format": "json", "titles": title} + raw = urllib.urlopen(API_URL, urllib.urlencode(data)).read() res = json.loads(raw) text = res["query"]["pages"].values()[0]["revisions"][0]["*"] return mwparserfromhell.parse(text) diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py index a1b6b8f..576c2c5 100755 --- a/mwparserfromhell/compat.py +++ b/mwparserfromhell/compat.py @@ -18,6 +18,7 @@ if py3k: basestring = str maxsize = sys.maxsize import html.entities as htmlentities + from io import StringIO else: bytes = str @@ -25,5 +26,6 @@ else: basestring = basestring maxsize = sys.maxint import htmlentitydefs as htmlentities + from StringIO import StringIO del sys diff --git a/tests/test_docs.py b/tests/test_docs.py new file mode 100644 index 0000000..5ec25e1 --- /dev/null +++ b/tests/test_docs.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import print_function, unicode_literals +import json +import unittest +import urllib + +import mwparserfromhell +from mwparserfromhell.compat import py3k, str, StringIO + +class TestDocs(unittest.TestCase): + def assertPrint(self, input, output): + """Assertion check that *input*, when printed, produces *output*.""" + buff = StringIO() + print(input, end="", file=buff) + buff.seek(0) + self.assertEqual(buff.read(), output) + + def test_readme_1(self): + text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" + wikicode = mwparserfromhell.parse(text) + self.assertPrint(wikicode, + "I has a template! {{foo|bar|baz|eggs=spam}} See it?") + templates = wikicode.filter_templates() + if py3k: + self.assertPrint(templates, "['{{foo|bar|baz|eggs=spam}}']") + else: + self.assertPrint(templates, "[u'{{foo|bar|baz|eggs=spam}}']") + template = templates[0] + self.assertPrint(template.name, "foo") + if py3k: + self.assertPrint(template.params, "['bar', 'baz', 'eggs=spam']") + else: + self.assertPrint(template.params, "[u'bar', u'baz', u'eggs=spam']") + self.assertPrint(template.get(1).value, "bar") + self.assertPrint(template.get("eggs").value, "spam") + + def test_readme_2(self): + code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") + if py3k: + self.assertPrint(code.filter_templates(), + "['{{foo|this {{includes a|template}}}}']") + else: + self.assertPrint(code.filter_templates(), + "[u'{{foo|this {{includes a|template}}}}']") + foo = code.filter_templates()[0] + self.assertPrint(foo.get(1).value, "this {{includes a|template}}") + self.assertPrint(foo.get(1).value.filter_templates()[0], + "{{includes a|template}}") + self.assertPrint(foo.get(1).value.filter_templates()[0].get(1).value, + "template") + + def test_readme_3(self): + text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" + temps = mwparserfromhell.parse(text).filter_templates(recursive=True) + if py3k: + res = "['{{foo|{{bar}}={{baz|{{spam}}}}}}', '{{bar}}', '{{baz|{{spam}}}}', '{{spam}}']" + else: + res = "[u'{{foo|{{bar}}={{baz|{{spam}}}}}}', u'{{bar}}', u'{{baz|{{spam}}}}', u'{{spam}}']" + self.assertPrint(temps, res) + + def test_readme_4(self): + text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}" + code = mwparserfromhell.parse(text) + for template in code.filter_templates(): + if template.name == "cleanup" and not template.has_param("date"): + template.add("date", "July 2012") + res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{uncategorized}}" + self.assertPrint(code, res) + code.replace("{{uncategorized}}", "{{bar-stub}}") + res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}" + self.assertPrint(code, res) + if py3k: + res = "['{{cleanup|date=July 2012}}', '{{bar-stub}}']" + else: + res = "[u'{{cleanup|date=July 2012}}', u'{{bar-stub}}']" + self.assertPrint(code.filter_templates(), res) + text = str(code) + res = "{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}" + self.assertPrint(text, res) + self.assertEqual(text, code) + + def test_readme_5(self): + url1 = "http://en.wikipedia.org/w/api.php" + url2 = "http://en.wikipedia.org/w/index.php?title={0}&action=raw" + title = "Test" + data = {"action": "query", "prop": "revisions", "rvlimit": 1, + "rvprop": "content", "format": "json", "titles": title} + raw = urllib.urlopen(url1, urllib.urlencode(data)).read() + res = json.loads(raw) + text = res["query"]["pages"].values()[0]["revisions"][0]["*"] + actual = mwparserfromhell.parse(text) + expected = urllib.urlopen(url2.format(title)).read().decode("utf8") + self.assertEqual(actual, expected) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_parameter.py b/tests/test_parameter.py deleted file mode 100644 index 2d5515b..0000000 --- a/tests/test_parameter.py +++ /dev/null @@ -1,119 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2012 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import unittest - -from mwparserfromhell.parameter import Parameter -from mwparserfromhell.template import Template - -class TestParameter(unittest.TestCase): - def setUp(self): - self.name = "foo" - self.value1 = "bar" - self.value2 = "{{spam}}" - self.value3 = "bar{{spam}}" - self.value4 = "embedded {{eggs|spam|baz=buz}} {{goes}} here" - self.templates2 = [Template("spam")] - self.templates3 = [Template("spam")] - self.templates4 = [Template("eggs", [Parameter("1", "spam"), - Parameter("baz", "buz")]), - Template("goes")] - - def test_construct(self): - Parameter(self.name, self.value1) - Parameter(self.name, self.value2, self.templates2) - Parameter(name=self.name, value=self.value3) - Parameter(name=self.name, value=self.value4, templates=self.templates4) - - def test_name(self): - params = [ - Parameter(self.name, self.value1), - Parameter(self.name, self.value2, self.templates2), - Parameter(name=self.name, value=self.value3), - Parameter(name=self.name, value=self.value4, - templates=self.templates4) - ] - for param in params: - self.assertEqual(param.name, self.name) - - def test_value(self): - tests = [ - (Parameter(self.name, self.value1), self.value1), - (Parameter(self.name, self.value2, self.templates2), self.value2), - (Parameter(name=self.name, value=self.value3), self.value3), - (Parameter(name=self.name, value=self.value4, - templates=self.templates4), self.value4) - ] - for param, correct in tests: - self.assertEqual(param.value, correct) - - def test_templates(self): - tests = [ - (Parameter(self.name, self.value3, self.templates3), - self.templates3), - (Parameter(name=self.name, value=self.value4, - templates=self.templates4), self.templates4) - ] - for param, correct in tests: - self.assertEqual(param.templates, correct) - - def test_magic(self): - params = [Parameter(self.name, self.value1), - Parameter(self.name, self.value2, self.templates2), - Parameter(self.name, self.value3, self.templates3), - Parameter(self.name, self.value4, self.templates4)] - for param in params: - self.assertEqual(repr(param), repr(param.value)) - self.assertEqual(str(param), str(param.value)) - self.assertIs(param < "eggs", param.value < "eggs") - self.assertIs(param <= "bar{{spam}}", param.value <= "bar{{spam}}") - self.assertIs(param == "bar", param.value == "bar") - self.assertIs(param != "bar", param.value != "bar") - self.assertIs(param > "eggs", param.value > "eggs") - self.assertIs(param >= "bar{{spam}}", param.value >= "bar{{spam}}") - self.assertEquals(bool(param), bool(param.value)) - self.assertEquals(len(param), len(param.value)) - self.assertEquals(list(param), list(param.value)) - self.assertEquals(param[2], param.value[2]) - self.assertEquals(list(reversed(param)), - list(reversed(param.value))) - self.assertIs("bar" in param, "bar" in param.value) - self.assertEquals(param + "test", param.value + "test") - self.assertEquals("test" + param, "test" + param.value) - # add param - # add template left - # add template right - - self.assertEquals(param * 3, Parameter(param.name, param.value * 3, - param.templates * 3)) - self.assertEquals(3 * param, Parameter(param.name, 3 * param.value, - 3 * param.templates)) - - # add param inplace - # add template implace - # add str inplace - # multiply int inplace - self.assertIsInstance(param, Parameter) - self.assertIsInstance(param.value, str) - -if __name__ == "__main__": - unittest.main(verbosity=2) diff --git a/tests/test_parser.py b/tests/test_parser.py deleted file mode 100644 index 0c989b8..0000000 --- a/tests/test_parser.py +++ /dev/null @@ -1,63 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2012 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import unittest - -from mwparserfromhell.parameter import Parameter -from mwparserfromhell.parser import Parser -from mwparserfromhell.template import Template - -TESTS = [ - ("", []), - ("abcdef ghijhk", []), - ("abc{this is not a template}def", []), - ("neither is {{this one}nor} {this one {despite}} containing braces", []), - ("this is an acceptable {{template}}", [Template("template")]), - ("{{multiple}}{{templates}}", [Template("multiple"), - Template("templates")]), - ("multiple {{-}} templates {{+}}!", [Template("-"), Template("+")]), - ("{{{no templates here}}}", []), - ("{ {{templates here}}}", [Template("templates here")]), - ("{{{{I do not exist}}}}", []), - ("{{foo|bar|baz|eggs=spam}}", - [Template("foo", [Parameter("1", "bar"), Parameter("2", "baz"), - Parameter("eggs", "spam")])]), - ("{{abc def|ghi|jk=lmno|pqr|st=uv|wx|yz}}", - [Template("abc def", [Parameter("1", "ghi"), Parameter("jk", "lmno"), - Parameter("2", "pqr"), Parameter("st", "uv"), - Parameter("3", "wx"), Parameter("4", "yz")])]), - ("{{this has a|{{template}}|inside of it}}", - [Template("this has a", [Parameter("1", "{{template}}", - [Template("template")]), - Parameter("2", "inside of it")])]), - ("{{{{I exist}} }}", [Template("I exist", [] )]), - ("{{}}") -] - -class TestParser(unittest.TestCase): - def test_parse(self): - parser = Parser() - for unparsed, parsed in TESTS: - self.assertEqual(parser.parse(unparsed), parsed) - -if __name__ == "__main__": - unittest.main(verbosity=2) diff --git a/tests/test_template.py b/tests/test_template.py deleted file mode 100644 index b006033..0000000 --- a/tests/test_template.py +++ /dev/null @@ -1,106 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2012 Ben Kurtovic -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -from itertools import permutations -import unittest - -from mwparserfromhell.parameter import Parameter -from mwparserfromhell.template import Template - -class TestTemplate(unittest.TestCase): - def setUp(self): - self.name = "foo" - self.bar = Parameter("1", "bar") - self.baz = Parameter("2", "baz") - self.eggs = Parameter("eggs", "spam") - self.params = [self.bar, self.baz, self.eggs] - - def test_construct(self): - Template(self.name) - Template(self.name, self.params) - Template(name=self.name) - Template(name=self.name, params=self.params) - - def test_name(self): - templates = [ - Template(self.name), - Template(self.name, self.params), - Template(name=self.name), - Template(name=self.name, params=self.params) - ] - for template in templates: - self.assertEqual(template.name, self.name) - - def test_params(self): - for template in (Template(self.name), Template(name=self.name)): - self.assertEqual(template.params, []) - for template in (Template(self.name, self.params), - Template(name=self.name, params=self.params)): - self.assertEqual(template.params, self.params) - - def test_getitem(self): - template = Template(name=self.name, params=self.params) - self.assertIs(template[0], self.bar) - self.assertIs(template[1], self.baz) - self.assertIs(template[2], self.eggs) - self.assertIs(template["1"], self.bar) - self.assertIs(template["2"], self.baz) - self.assertIs(template["eggs"], self.eggs) - - def test_render(self): - tests = [ - (Template(self.name), "{{foo}}"), - (Template(self.name, self.params), "{{foo|bar|baz|eggs=spam}}") - ] - for template, rendered in tests: - self.assertEqual(template.render(), rendered) - - def test_repr(self): - correct1= 'Template(name=foo, params={})' - correct2 = 'Template(name=foo, params={"1": "bar", "2": "baz", "eggs": "spam"})' - tests = [(Template(self.name), correct1), - (Template(self.name, self.params), correct2)] - for template, correct in tests: - self.assertEqual(repr(template), correct) - self.assertEqual(str(template), correct) - - def test_cmp(self): - tmp1 = Template(self.name) - tmp2 = Template(name=self.name) - tmp3 = Template(self.name, []) - tmp4 = Template(name=self.name, params=[]) - tmp5 = Template(self.name, self.params) - tmp6 = Template(name=self.name, params=self.params) - - for tmpA, tmpB in permutations((tmp1, tmp2, tmp3, tmp4), 2): - self.assertEqual(tmpA, tmpB) - - for tmpA, tmpB in permutations((tmp5, tmp6), 2): - self.assertEqual(tmpA, tmpB) - - for tmpA in (tmp5, tmp6): - for tmpB in (tmp1, tmp2, tmp3, tmp4): - self.assertNotEqual(tmpA, tmpB) - self.assertNotEqual(tmpB, tmpA) - -if __name__ == "__main__": - unittest.main(verbosity=2) From fb7567d6d0e7974beac39780c741fba3e50693b9 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 2 Feb 2013 23:44:15 -0500 Subject: [PATCH 008/115] Some empty testcases. --- .gitignore | 1 + tests/_test_tokenizer.py | 28 ++++++++++++++++++++++++++++ tests/test_builder.py | 29 +++++++++++++++++++++++++++++ tests/test_ctokenizer.py | 34 ++++++++++++++++++++++++++++++++++ tests/test_parser.py | 29 +++++++++++++++++++++++++++++ tests/test_pytokenizer.py | 34 ++++++++++++++++++++++++++++++++++ tests/test_tokens.py | 29 +++++++++++++++++++++++++++++ 7 files changed, 184 insertions(+) create mode 100644 tests/_test_tokenizer.py create mode 100644 tests/test_builder.py create mode 100644 tests/test_ctokenizer.py create mode 100644 tests/test_parser.py create mode 100644 tests/test_pytokenizer.py create mode 100644 tests/test_tokens.py diff --git a/.gitignore b/.gitignore index d70b37d..ec4e8ca 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *.pyc +*.so *.egg *.egg-info .DS_Store diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py new file mode 100644 index 0000000..29f4e37 --- /dev/null +++ b/tests/_test_tokenizer.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +class TestTokenizer(): + def tokenize(self, text): + return self.tokenizer().tokenize(text) + + def test_basic(self): + self.assertEqual(1, 1) diff --git a/tests/test_builder.py b/tests/test_builder.py new file mode 100644 index 0000000..e38e683 --- /dev/null +++ b/tests/test_builder.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import unittest + +class TestBuilder(unittest.TestCase): + pass + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py new file mode 100644 index 0000000..e5a7aef --- /dev/null +++ b/tests/test_ctokenizer.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import unittest + +from _test_tokenizer import TestTokenizer + +class TestCTokenizer(unittest.TestCase, TestTokenizer): + @classmethod + def setUpClass(cls): + from mwparserfromhell.parser._tokenizer import CTokenizer + cls.tokenizer = CTokenizer + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_parser.py b/tests/test_parser.py new file mode 100644 index 0000000..3f9b2e6 --- /dev/null +++ b/tests/test_parser.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import unittest + +class TestParser(unittest.TestCase): + pass + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py new file mode 100644 index 0000000..01855f7 --- /dev/null +++ b/tests/test_pytokenizer.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import unittest + +from _test_tokenizer import TestTokenizer + +class TestPyTokenizer(unittest.TestCase, TestTokenizer): + @classmethod + def setUpClass(cls): + from mwparserfromhell.parser.tokenizer import Tokenizer + cls.tokenizer = Tokenizer + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_tokens.py b/tests/test_tokens.py new file mode 100644 index 0000000..0e7f87b --- /dev/null +++ b/tests/test_tokens.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import unittest + +class TestTokens(unittest.TestCase): + pass + +if __name__ == "__main__": + unittest.main(verbosity=2) From 4636fbeb4a46e76b5d04a9c439758ed042eea7eb Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 3 Feb 2013 02:10:36 -0500 Subject: [PATCH 009/115] Built an infrastructure for loading and running tokenizer tests. --- tests/_test_tokenizer.py | 74 +++++++++++++++++++++++++++++++++++++++++++---- tests/test_ctokenizer.py | 4 +-- tests/test_pytokenizer.py | 4 +-- tests/tokenizer/text.test | 11 +++++++ 4 files changed, 84 insertions(+), 9 deletions(-) create mode 100644 tests/tokenizer/text.test diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 29f4e37..1efafd9 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -20,9 +20,73 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -class TestTokenizer(): - def tokenize(self, text): - return self.tokenizer().tokenize(text) +from __future__ import print_function, unicode_literals +from os import listdir, path - def test_basic(self): - self.assertEqual(1, 1) +from mwparserfromhell.parser import tokens + +class _TestParseError(Exception): + """Raised internally when a test could not be parsed.""" + pass + + +class TokenizerTestCase(object): + @classmethod + def _build_test_method(cls, funcname, data): + def inner(self): + actual = self.tokenizer().tokenize(data["input"]) + self.assertEqual(actual, data["output"]) + inner.__name__ = funcname.encode("utf8") + inner.__doc__ = data["label"] + return inner + + @classmethod + def _load_tests(cls, filename, text): + tests = text.split("\n---\n") + for test in tests: + data = {"name": "", "label": "", "input": "", "output": []} + try: + for line in test.strip().splitlines(): + if line.startswith("name:"): + data["name"] = line[len("name:"):].strip() + elif line.startswith("label:"): + data["label"] = line[len("label:"):].strip() + elif line.startswith("input:"): + raw = line[len("input:"):].strip() + if raw[0] == '"' and raw[-1] == '"': + raw = raw[1:-1] + data["input"] = raw.decode("unicode_escape") + elif line.startswith("output:"): + raw = line[len("output:"):].strip() + data["output"] = eval(raw, vars(tokens)) + except _TestParseError: + if data["name"]: + error = "Could not parse test {0} in {1}" + print(error.format(data["name"], filename)) + else: + print("Could not parse a test in {0}".format(filename)) + continue + if not data["name"]: + error = "A test in {0} was ignored because it lacked a name" + print(error.format(filename)) + continue + if not data["input"] or not data["output"]: + error = "Test {0} in {1} was ignored because it lacked an input or an output" + print(error.format(data["name"], filename)) + continue + funcname = "test_" + filename + "_" + data["name"] + meth = cls._build_test_method(funcname, data) + setattr(cls, funcname, meth) + + @classmethod + def build(cls): + directory = path.join(path.dirname(__file__), "tokenizer") + extension = ".test" + for filename in listdir(directory): + if not filename.endswith(extension): + continue + with open(path.join(directory, filename), "r") as fp: + text = fp.read().decode("utf8") + cls._load_tests(filename[:0-len(extension)], text) + +TokenizerTestCase.build() diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index e5a7aef..7d3ffd7 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -22,9 +22,9 @@ import unittest -from _test_tokenizer import TestTokenizer +from _test_tokenizer import TokenizerTestCase -class TestCTokenizer(unittest.TestCase, TestTokenizer): +class TestCTokenizer(TokenizerTestCase, unittest.TestCase): @classmethod def setUpClass(cls): from mwparserfromhell.parser._tokenizer import CTokenizer diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index 01855f7..f739726 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -22,9 +22,9 @@ import unittest -from _test_tokenizer import TestTokenizer +from _test_tokenizer import TokenizerTestCase -class TestPyTokenizer(unittest.TestCase, TestTokenizer): +class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): @classmethod def setUpClass(cls): from mwparserfromhell.parser.tokenizer import Tokenizer diff --git a/tests/tokenizer/text.test b/tests/tokenizer/text.test new file mode 100644 index 0000000..8d97412 --- /dev/null +++ b/tests/tokenizer/text.test @@ -0,0 +1,11 @@ +name: basic +label: sanity check for basic text parsing, no gimmicks +input: "foobar" +output: [Text(text="foobar")] + +--- + +name: basic2 +label: slightly more complex text parsing, with newlines +input: "This is a line of text.\nThis is another line of text." +output: [Text(text="This is a line of text.\nThis is another line of text.")] From 357b6dc4470f724eac6a19bef54b27761e6a492f Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 3 Feb 2013 02:33:31 -0500 Subject: [PATCH 010/115] Make unit tests work in Python 3; add a unicode text test. --- tests/_test_tokenizer.py | 18 ++++++++++++------ tests/tokenizer/text.test | 13 ++++++++++--- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 1efafd9..98d9434 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -23,6 +23,7 @@ from __future__ import print_function, unicode_literals from os import listdir, path +from mwparserfromhell.compat import py3k from mwparserfromhell.parser import tokens class _TestParseError(Exception): @@ -36,12 +37,14 @@ class TokenizerTestCase(object): def inner(self): actual = self.tokenizer().tokenize(data["input"]) self.assertEqual(actual, data["output"]) - inner.__name__ = funcname.encode("utf8") + if not py3k: + inner.__name__ = funcname.encode("utf8") inner.__doc__ = data["label"] return inner @classmethod def _load_tests(cls, filename, text): + counter = 1 tests = text.split("\n---\n") for test in tests: data = {"name": "", "label": "", "input": "", "output": []} @@ -55,7 +58,7 @@ class TokenizerTestCase(object): raw = line[len("input:"):].strip() if raw[0] == '"' and raw[-1] == '"': raw = raw[1:-1] - data["input"] = raw.decode("unicode_escape") + data["input"] = raw.encode("raw_unicode_escape").decode("unicode_escape") elif line.startswith("output:"): raw = line[len("output:"):].strip() data["output"] = eval(raw, vars(tokens)) @@ -74,9 +77,10 @@ class TokenizerTestCase(object): error = "Test {0} in {1} was ignored because it lacked an input or an output" print(error.format(data["name"], filename)) continue - funcname = "test_" + filename + "_" + data["name"] - meth = cls._build_test_method(funcname, data) - setattr(cls, funcname, meth) + fname = "test_{0}{1}_{2}".format(filename, counter, data["name"]) + meth = cls._build_test_method(fname, data) + setattr(cls, fname, meth) + counter += 1 @classmethod def build(cls): @@ -86,7 +90,9 @@ class TokenizerTestCase(object): if not filename.endswith(extension): continue with open(path.join(directory, filename), "r") as fp: - text = fp.read().decode("utf8") + text = fp.read() + if not py3k: + text = text.decode("utf8") cls._load_tests(filename[:0-len(extension)], text) TokenizerTestCase.build() diff --git a/tests/tokenizer/text.test b/tests/tokenizer/text.test index 8d97412..eb5b9b4 100644 --- a/tests/tokenizer/text.test +++ b/tests/tokenizer/text.test @@ -5,7 +5,14 @@ output: [Text(text="foobar")] --- -name: basic2 +name: newlines label: slightly more complex text parsing, with newlines -input: "This is a line of text.\nThis is another line of text." -output: [Text(text="This is a line of text.\nThis is another line of text.")] +input: "This is a line of text.\nThis is another line of text.\nThis is another." +output: [Text(text="This is a line of text.\nThis is another line of text.\nThis is another.")] + +--- + +name: unicode +label: ensure unicode data is handled properly +input: "Thís ís å sëñtënce with diœcritiçs." +output: [Text(text="Thís ís å sëñtënce with diœcritiçs.")] From ecfb2c628f742c7c703fe67e8a0f7b5a51d62570 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 3 Feb 2013 14:16:17 -0500 Subject: [PATCH 011/115] Another test; handle errors when reading output line better. --- tests/_test_tokenizer.py | 16 ++++++++++------ tests/tokenizer/text.test | 7 +++++++ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 98d9434..bafb593 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -58,23 +58,27 @@ class TokenizerTestCase(object): raw = line[len("input:"):].strip() if raw[0] == '"' and raw[-1] == '"': raw = raw[1:-1] - data["input"] = raw.encode("raw_unicode_escape").decode("unicode_escape") + raw = raw.encode("raw_unicode_escape") + data["input"] = raw.decode("unicode_escape") elif line.startswith("output:"): raw = line[len("output:"):].strip() - data["output"] = eval(raw, vars(tokens)) + try: + data["output"] = eval(raw, vars(tokens)) + except Exception: + raise _TestParseError() except _TestParseError: if data["name"]: - error = "Could not parse test {0} in {1}" + error = "Could not parse test '{0}' in '{1}'" print(error.format(data["name"], filename)) else: - print("Could not parse a test in {0}".format(filename)) + print("Could not parse a test in '{0}'".format(filename)) continue if not data["name"]: - error = "A test in {0} was ignored because it lacked a name" + error = "A test in '{0}' was ignored because it lacked a name" print(error.format(filename)) continue if not data["input"] or not data["output"]: - error = "Test {0} in {1} was ignored because it lacked an input or an output" + error = "Test '{0}'' in '{1}' was ignored because it lacked an input or an output" print(error.format(data["name"], filename)) continue fname = "test_{0}{1}_{2}".format(filename, counter, data["name"]) diff --git a/tests/tokenizer/text.test b/tests/tokenizer/text.test index eb5b9b4..77d5f50 100644 --- a/tests/tokenizer/text.test +++ b/tests/tokenizer/text.test @@ -16,3 +16,10 @@ name: unicode label: ensure unicode data is handled properly input: "Thís ís å sëñtënce with diœcritiçs." output: [Text(text="Thís ís å sëñtënce with diœcritiçs.")] + +--- + +name: unicode2 +label: additional unicode check for non-BMP codepoints +input: "𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰" +output: [Text(text="𐌲𐌿𐍄𐌰𐍂𐌰𐌶𐌳𐌰")] From eb1bd6b281ffe5e193825da4f36cdf1cf8b49767 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 3 Feb 2013 14:38:34 -0500 Subject: [PATCH 012/115] Add some basic tests for templates; adjust error messages again. --- tests/_test_tokenizer.py | 13 +++++++------ tests/tokenizer/templates.test | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 6 deletions(-) create mode 100644 tests/tokenizer/templates.test diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index bafb593..2571692 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -64,14 +64,15 @@ class TokenizerTestCase(object): raw = line[len("output:"):].strip() try: data["output"] = eval(raw, vars(tokens)) - except Exception: - raise _TestParseError() - except _TestParseError: + except Exception as err: + raise _TestParseError(err) + except _TestParseError as err: if data["name"]: - error = "Could not parse test '{0}' in '{1}'" - print(error.format(data["name"], filename)) + error = "Could not parse test '{0}' in '{1}':\n\t{2}" + print(error.format(data["name"], filename, err)) else: - print("Could not parse a test in '{0}'".format(filename)) + error = "Could not parse a test in '{0}':\n\t{1}" + print(error.format(filename, err)) continue if not data["name"]: error = "A test in '{0}' was ignored because it lacked a name" diff --git a/tests/tokenizer/templates.test b/tests/tokenizer/templates.test new file mode 100644 index 0000000..23ac38f --- /dev/null +++ b/tests/tokenizer/templates.test @@ -0,0 +1,32 @@ +name: no_params +label: simplest type of template +input: "{{template}}" +output: [TemplateOpen(), Text(text="template"), TemplateClose()] + +--- + +name: one_param_unnamed +label: basic template with one unnamed parameter +input: "{{foo|bar}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateClose()] + +--- + +name: one_param_named +label: basic template with one named parameter +input: "{{foo|bar=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: multiple_unnamed_params +label: basic template with multiple unnamed parameters +input: "{{foo|bar|baz|biz|buzz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateClose()] + +--- + +name: multiple_named_params +label: basic template with multiple named parameters +input: "{{foo|bar=baz|biz=buzz|buff=baff|usr=bin}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), Text(text="buzz"), TemplateParamSeparator(), Text(text="buff"), TemplateParamEquals(), Text(text="baff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamEquals(), Text(text="bin"), TemplateClose()] From 713b83a4d94e05bf907158aa6a5d98f7132d998c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 3 Feb 2013 17:41:55 -0500 Subject: [PATCH 013/115] Added a metric ton of template tests; adjustments; docstrings. --- README.rst | 3 +- tests/_test_tokenizer.py | 22 +++- tests/test_ctokenizer.py | 1 + tests/test_docs.py | 6 + tests/test_pytokenizer.py | 1 + tests/tokenizer/templates.test | 285 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 314 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 3901103..90e896f 100644 --- a/README.rst +++ b/README.rst @@ -18,7 +18,8 @@ so you can install the latest release with ``pip install mwparserfromhell`` cd mwparserfromhell python setup.py install -You can run the comprehensive unit testing suite with ``python setup.py test``. +You can run the comprehensive unit testing suite with +``python setup.py test -q``. Usage ----- diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 2571692..bef7569 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -32,8 +32,20 @@ class _TestParseError(Exception): class TokenizerTestCase(object): + """A base test case for tokenizers, whose tests are loaded dynamically. + + Subclassed along with unittest.TestCase to form TestPyTokenizer and + TestCTokenizer. Tests are loaded dynamically from files in the 'tokenizer' + directory. + """ @classmethod def _build_test_method(cls, funcname, data): + """Create and return a method to be treated as a test case method. + + *data* is a dict containing multiple keys: the *input* text to be + tokenized, the expected list of tokens as *output*, and an optional + *label* for the method's docstring. + """ def inner(self): actual = self.tokenizer().tokenize(data["input"]) self.assertEqual(actual, data["output"]) @@ -44,8 +56,10 @@ class TokenizerTestCase(object): @classmethod def _load_tests(cls, filename, text): - counter = 1 + """Load all tests in *text* from the file *filename*.""" tests = text.split("\n---\n") + counter = 1 + digits = len(str(len(tests))) for test in tests: data = {"name": "", "label": "", "input": "", "output": []} try: @@ -79,16 +93,18 @@ class TokenizerTestCase(object): print(error.format(filename)) continue if not data["input"] or not data["output"]: - error = "Test '{0}'' in '{1}' was ignored because it lacked an input or an output" + error = "Test '{0}' in '{1}' was ignored because it lacked an input or an output" print(error.format(data["name"], filename)) continue - fname = "test_{0}{1}_{2}".format(filename, counter, data["name"]) + number = str(counter).zfill(digits) + fname = "test_{0}{1}_{2}".format(filename, number, data["name"]) meth = cls._build_test_method(fname, data) setattr(cls, fname, meth) counter += 1 @classmethod def build(cls): + """Load and install all tests from the 'tokenizer' directory.""" directory = path.join(path.dirname(__file__), "tokenizer") extension = ".test" for filename in listdir(directory): diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 7d3ffd7..86f4787 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -25,6 +25,7 @@ import unittest from _test_tokenizer import TokenizerTestCase class TestCTokenizer(TokenizerTestCase, unittest.TestCase): + """Test cases for the C tokenizer.""" @classmethod def setUpClass(cls): from mwparserfromhell.parser._tokenizer import CTokenizer diff --git a/tests/test_docs.py b/tests/test_docs.py index 5ec25e1..d99652f 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -29,6 +29,7 @@ import mwparserfromhell from mwparserfromhell.compat import py3k, str, StringIO class TestDocs(unittest.TestCase): + """Integration test cases for mwparserfromhell's documentation.""" def assertPrint(self, input, output): """Assertion check that *input*, when printed, produces *output*.""" buff = StringIO() @@ -37,6 +38,7 @@ class TestDocs(unittest.TestCase): self.assertEqual(buff.read(), output) def test_readme_1(self): + """test a block of example code in the README""" text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?" wikicode = mwparserfromhell.parse(text) self.assertPrint(wikicode, @@ -56,6 +58,7 @@ class TestDocs(unittest.TestCase): self.assertPrint(template.get("eggs").value, "spam") def test_readme_2(self): + """test a block of example code in the README""" code = mwparserfromhell.parse("{{foo|this {{includes a|template}}}}") if py3k: self.assertPrint(code.filter_templates(), @@ -71,6 +74,7 @@ class TestDocs(unittest.TestCase): "template") def test_readme_3(self): + """test a block of example code in the README""" text = "{{foo|{{bar}}={{baz|{{spam}}}}}}" temps = mwparserfromhell.parse(text).filter_templates(recursive=True) if py3k: @@ -80,6 +84,7 @@ class TestDocs(unittest.TestCase): self.assertPrint(temps, res) def test_readme_4(self): + """test a block of example code in the README""" text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}" code = mwparserfromhell.parse(text) for template in code.filter_templates(): @@ -101,6 +106,7 @@ class TestDocs(unittest.TestCase): self.assertEqual(text, code) def test_readme_5(self): + """test a block of example code in the README; includes a web call""" url1 = "http://en.wikipedia.org/w/api.php" url2 = "http://en.wikipedia.org/w/index.php?title={0}&action=raw" title = "Test" diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index f739726..4254748 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -25,6 +25,7 @@ import unittest from _test_tokenizer import TokenizerTestCase class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): + """Test cases for the Python tokenizer.""" @classmethod def setUpClass(cls): from mwparserfromhell.parser.tokenizer import Tokenizer diff --git a/tests/tokenizer/templates.test b/tests/tokenizer/templates.test index 23ac38f..7399022 100644 --- a/tests/tokenizer/templates.test +++ b/tests/tokenizer/templates.test @@ -30,3 +30,288 @@ name: multiple_named_params label: basic template with multiple named parameters input: "{{foo|bar=baz|biz=buzz|buff=baff|usr=bin}}" output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), Text(text="buzz"), TemplateParamSeparator(), Text(text="buff"), TemplateParamEquals(), Text(text="baff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamEquals(), Text(text="bin"), TemplateClose()] + +--- + +name: multiple_mixed_params +label: basic template with multiple unnamed/named parameters +input: "{{foo|bar=baz|biz|buzz=buff|usr|bin}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateParamEquals(), Text(text="buff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamSeparator(), Text(text="bin"), TemplateClose()] + +--- + +name: multiple_mixed_params2 +label: basic template with multiple unnamed/named parameters in another order +input: "{{foo|bar|baz|biz=buzz|buff=baff|usr=bin}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), Text(text="buzz"), TemplateParamSeparator(), Text(text="buff"), TemplateParamEquals(), Text(text="baff"), TemplateParamSeparator(), Text(text="usr"), TemplateParamEquals(), Text(text="bin"), TemplateClose()] + +--- + +name: nested_unnamed_param +label: nested template as an unnamed parameter +input: "{{foo|{{bar}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()] + +--- + +name: nested_named_param_value +label: nested template as a parameter value with a named parameter +input: "{{foo|bar={{baz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar"), TemplateParamEquals(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_named_param_name_and_value +label: nested templates as a parameter name and value +input: "{{foo|{{bar}}={{baz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start +label: nested template at the beginning of a template name +input: "{{{{foo}}bar}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose()] + +--- + +name: nested_name_start_unnamed_param +label: nested template at the beginning of a template name and as an unnamed parameter +input: "{{{{foo}}bar|{{baz}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start_named_param_value +label: nested template at the beginning of a template name and as a parameter value with a named parameter +input: "{{{{foo}}bar|baz={{biz}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start_named_param_name_and_value +label: nested template at the beginning of a template name and as a parameter name and value +input: "{{{{foo}}bar|{{baz}}={{biz}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_end +label: nested template at the end of a template name +input: "{{foo{{bar}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_end_unnamed_param +label: nested template at the end of a template name and as an unnamed parameter +input: "{{foo{{bar}}|{{baz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_end_named_param_value +label: nested template at the end of a template name and as a parameter value with a named parameter +input: "{{foo{{bar}}|baz={{biz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_end_named_param_name_and_value +label: nested template at the end of a template name and as a parameter name and value +input: "{{foo{{bar}}|{{baz}}={{biz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_mid +label: nested template in the middle of a template name +input: "{{foo{{bar}}baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose()] + +--- + +name: nested_name_mid_unnamed_param +label: nested template in the middle of a template name and as an unnamed parameter +input: "{{foo{{bar}}baz|{{biz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_mid_named_param_value +label: nested template in the middle of a template name and as a parameter value with a named parameter +input: "{{foo{{bar}}baz|biz={{buzz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), Text(text="biz"), TemplateParamEquals(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_mid_named_param_name_and_value +label: nested template in the middle of a template name and as a parameter name and value +input: "{{foo{{bar}}baz|{{biz}}={{buzz}}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateOpen(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateParamSeparator(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start_end +label: nested template at the beginning and end of a template name +input: "{{{{foo}}{{bar}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start_end_unnamed_param +label: nested template at the beginning and end of a template name and as an unnamed parameter +input: "{{{{foo}}{{bar}}|{{baz}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start_end_named_param_value +label: nested template at the beginning and end of a template name and as a parameter value with a named parameter +input: "{{{{foo}}{{bar}}|baz={{biz}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_name_start_end_named_param_name_and_value +label: nested template at the beginning and end of a template name and as a parameter name and value +input: "{{{{foo}}{{bar}}|{{baz}}={{biz}}}}" +output: [TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), TemplateOpen(), Text(text="bar"), TemplateClose(), TemplateParamSeparator(), TemplateOpen(), Text(text="baz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="biz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_names_multiple +label: multiple nested templates within nested templates +input: "{{{{{{{{foo}}bar}}baz}}biz}}" +output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateClose()] + +--- + +name: nested_names_multiple_unnamed_param +label: multiple nested templates within nested templates with a nested unnamed parameter +input: "{{{{{{{{foo}}bar}}baz}}biz|{{buzz}}}}" +output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateClose()] + +--- + +name: nested_names_multiple_named_param_value +label: multiple nested templates within nested templates with a nested parameter value in a named parameter +input: "{{{{{{{{foo}}bar}}baz}}biz|buzz={{bin}}}}" +output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), Text(text="buzz"), TemplateParamEquals(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()] + +--- + +name: nested_names_multiple_named_param_name_and_value +label: multiple nested templates within nested templates with a nested parameter name and value +input: "{{{{{{{{foo}}bar}}baz}}biz|{{buzz}}={{bin}}}}" +output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateClose(), Text(text="baz"), TemplateClose(), Text(text="biz"), TemplateParamSeparator(), TemplateOpen(), Text(text="buzz"), TemplateClose(), TemplateParamEquals(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()] + +--- + +name: incomplete_tests + +"{{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}" + +"{{\nfoobar}}" +"{{foobar\n}}" +"{{\nfoobar\n}}" +"{{foo\nbar}}" +"{{\nfoo\nbar}}" +"{{foo\nbar\n}}" +"{{\nfoo\nbar\n}}" + +"{{foo|\nbar}}" +"{{foo|bar\n}}" +"{{foo|\nbar\n}}" +"{{foo|\nb\nar}}" +"{{foo|b\nar\n}}" +"{{foo|\nb\nar\n}}" +"{{\nfoo|\nbar}}" +"{{\nfoo|bar\n}}" +"{{\nfoo|\nbar\n}}" +"{{\nfoo|\nb\nar}}" +"{{\nfoo|b\nar\n}}" +"{{\nfoo|\nb\nar\n}}" +"{{foo\n|\nbar}}" +"{{foo\n|bar\n}}" +"{{foo\n|\nbar\n}}" +"{{foo\n|\nb\nar}}" +"{{foo\n|b\nar\n}}" +"{{foo\n|\nb\nar\n}}" +"{{\nfoo\n|\nbar}}" +"{{\nfoo\n|bar\n}}" +"{{\nfoo\n|\nbar\n}}" +"{{\nfoo\n|\nb\nar}}" +"{{\nfoo\n|b\nar\n}}" +"{{\nfoo\n|\nb\nar\n}}" +"{{f\noo|\nbar}}" +"{{\nf\noo|\nbar}}" +"{{f\noo\n|\nbar}}" +"{{\nf\noo\n|\nbar}}" + +"{{foo|1=\nbar}}" +"{{foo|1=bar\n}}" +"{{foo|1=\nbar\n}}" +"{{foo|1=\nb\nar}}" +"{{foo|1=b\nar\n}}" +"{{foo|1=\nb\nar\n}}" +"{{foo|\nbar=baz}}" +"{{foo|bar\n=baz}}" +"{{foo|\nbar\n=baz}}" +"{{foo|\nb\nar=baz}}" +"{{foo|b\nar\n=baz}}" +"{{foo|\nb\nar\n=baz}}" +"{{foo|\nbar=baz\n}}" +"{{foo|bar\n=baz\n}}" +"{{foo|\nbar\n=baz\n}}" +"{{foo|\nb\nar=baz\n}}" +"{{foo|b\nar\n=baz\n}}" +"{{foo|\nb\nar\n=baz\n}}" +"{{foo|\nbar=\nbaz}}" +"{{foo|bar\n=\nbaz}}" +"{{foo|\nbar\n=\nbaz}}" +"{{foo|\nb\nar=\nbaz}}" +"{{foo|b\nar\n=\nbaz}}" +"{{foo|\nb\nar\n=\nbaz}}" +"{{foo|\nbar=\nbaz\n}}" +"{{foo|bar\n=\nbaz\n}}" +"{{foo|\nbar\n=\nbaz\n}}" +"{{foo|\nb\nar=\nbaz\n}}" +"{{foo|b\nar\n=\nbaz\n}}" +"{{foo|\nb\nar\n=\nbaz\n}}" +"{{foo|\nbar=ba\nz}}" +"{{foo|bar\n=ba\nz}}" +"{{foo|\nbar\n=ba\nz}}" +"{{foo|\nb\nar=ba\nz}}" +"{{foo|b\nar\n=ba\nz}}" +"{{foo|\nb\nar\n=ba\nz}}" + +"{{\nfoo\n|\nbar\n=\nb\naz\n|\nb\nuz\n}}" +"{{\nfoo\n|\nb\nar\n|\nbaz\n=\nb\nuz\n}}" + +"{{\nfoo\n|\n{{\nbar\n|\nbaz\n=\nb\niz\n}}\n=\nb\nuzz\n}}" + +"{{foo{bar}}" +"{{foo}bar}}" +"{{{foobar}}" +"{{foo{b{ar}}" +"{{foo[bar}}" +"{{foo]bar}}" +"{{[foobar}}" +"{{foobar]}}" + +"{{foobar" +"{{foobar}" +"{{foobar|" +"{{foo|bar" +"{{foo|bar|" +"{{foo|bar=" +"{{foo|bar=|" +"{{foo|bar=baz" +"{{foo|bar=baz|" +"{{foo|bar|baz" +"{{foo|bar|baz=" +"{{foo|bar|baz=biz" +"{{foo|bar=baz|biz" +"{{foo|bar=baz|biz=" +"{{foo|bar=baz|biz=buzz" From d500f8972e8a3ae0bfee706d40b76b3bfa1fc00d Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 16 Feb 2013 13:01:41 -0500 Subject: [PATCH 014/115] Add a few more tests; use assert*(expected, actual) instead of opposite. --- tests/_test_tokenizer.py | 7 +++-- tests/test_docs.py | 6 ++-- tests/tokenizer/templates.test | 68 ++++++++++++++++++++++++++++++++++++------ 3 files changed, 66 insertions(+), 15 deletions(-) diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index bef7569..114b835 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -47,8 +47,9 @@ class TokenizerTestCase(object): *label* for the method's docstring. """ def inner(self): + expected = data["output"] actual = self.tokenizer().tokenize(data["input"]) - self.assertEqual(actual, data["output"]) + self.assertEqual(expected, actual) if not py3k: inner.__name__ = funcname.encode("utf8") inner.__doc__ = data["label"] @@ -61,7 +62,7 @@ class TokenizerTestCase(object): counter = 1 digits = len(str(len(tests))) for test in tests: - data = {"name": "", "label": "", "input": "", "output": []} + data = {"name": None, "label": None, "input": None, "output": None} try: for line in test.strip().splitlines(): if line.startswith("name:"): @@ -92,7 +93,7 @@ class TokenizerTestCase(object): error = "A test in '{0}' was ignored because it lacked a name" print(error.format(filename)) continue - if not data["input"] or not data["output"]: + if data["input"] is None or data["output"] is None: error = "Test '{0}' in '{1}' was ignored because it lacked an input or an output" print(error.format(data["name"], filename)) continue diff --git a/tests/test_docs.py b/tests/test_docs.py index d99652f..8673cb9 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -35,7 +35,7 @@ class TestDocs(unittest.TestCase): buff = StringIO() print(input, end="", file=buff) buff.seek(0) - self.assertEqual(buff.read(), output) + self.assertEqual(output, buff.read()) def test_readme_1(self): """test a block of example code in the README""" @@ -115,9 +115,9 @@ class TestDocs(unittest.TestCase): raw = urllib.urlopen(url1, urllib.urlencode(data)).read() res = json.loads(raw) text = res["query"]["pages"].values()[0]["revisions"][0]["*"] - actual = mwparserfromhell.parse(text) expected = urllib.urlopen(url2.format(title)).read().decode("utf8") - self.assertEqual(actual, expected) + actual = mwparserfromhell.parse(text) + self.assertEqual(expected, actual) if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/tokenizer/templates.test b/tests/tokenizer/templates.test index 7399022..348e1f5 100644 --- a/tests/tokenizer/templates.test +++ b/tests/tokenizer/templates.test @@ -208,17 +208,62 @@ output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(te --- -name: incomplete_tests +name: mixed_nested_templates +label: mixed assortment of nested templates within template names, parameter names, and values +input: "{{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}" +output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), Text(text="biz"), TemplateClose(), Text(text="buzz"), TemplateClose(), Text(text="usr"), TemplateParamSeparator(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()] + +--- + +name: newline_start +label: a newline at the start of a template name +input: "{{\nfoobar}}" +output: [TemplateOpen(), Text(text="\nfoobar"), TemplateClose()] + +--- + +name: newline_end +label: a newline at the end of a template name +input: "{{foobar\n}}" +output: [TemplateOpen(), Text(text="foobar\n"), TemplateClose()] + +--- + +name: newline_start_end +label: a newline at the start and end of a template name +input: "{{\nfoobar\n}}" +output: [TemplateOpen(), Text(text="\nfoobar\n"), TemplateClose()] + +--- -"{{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}" +name: newline_mid +label: a newline at the middle of a template name +input: "{{foo\nbar}}" +output: [Text(text="{{foo\nbar}}")] -"{{\nfoobar}}" -"{{foobar\n}}" -"{{\nfoobar\n}}" -"{{foo\nbar}}" -"{{\nfoo\nbar}}" -"{{foo\nbar\n}}" -"{{\nfoo\nbar\n}}" +--- + +name: newline_start_mid +label: a newline at the start and middle of a template name +input: "{{\nfoo\nbar}}" +output: [Text(text="{{\nfoo\nbar}}")] + +--- + +name: newline_mid_end +label: a newline at the middle and end of a template name +input: "{{foo\nbar\n}}" +output: [Text(text="{{foo\nbar\n}}")] + +--- + +name: newline_start_mid_end +label: a newline at the start, middle, and end of a template name +input: "{{\nfoo\nbar\n}}" +output: [Text(text="{{\nfoo\nbar\n}}")] + +--- +name: incomplete_tests "{{foo|\nbar}}" "{{foo|bar\n}}" @@ -300,6 +345,11 @@ name: incomplete_tests "{{[foobar}}" "{{foobar]}}" +"{{foo|ba{r}}" +"{{foo|ba{r}}}" +"{{foo|ba{r}=baz}}" +"{{foo|ba[r]}}" + "{{foobar" "{{foobar}" "{{foobar|" From 660a0c31e60ddde8435fb3c8c743e5f1c9f7ea77 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 16 Feb 2013 15:30:12 -0500 Subject: [PATCH 015/115] Adding a bunch more tests. --- tests/tokenizer/templates.test | 226 +++++++++++++++++++++++++++++++++++------ 1 file changed, 197 insertions(+), 29 deletions(-) diff --git a/tests/tokenizer/templates.test b/tests/tokenizer/templates.test index 348e1f5..9223d61 100644 --- a/tests/tokenizer/templates.test +++ b/tests/tokenizer/templates.test @@ -263,36 +263,204 @@ input: "{{\nfoo\nbar\n}}" output: [Text(text="{{\nfoo\nbar\n}}")] --- -name: incomplete_tests -"{{foo|\nbar}}" -"{{foo|bar\n}}" -"{{foo|\nbar\n}}" -"{{foo|\nb\nar}}" -"{{foo|b\nar\n}}" -"{{foo|\nb\nar\n}}" -"{{\nfoo|\nbar}}" -"{{\nfoo|bar\n}}" -"{{\nfoo|\nbar\n}}" -"{{\nfoo|\nb\nar}}" -"{{\nfoo|b\nar\n}}" -"{{\nfoo|\nb\nar\n}}" -"{{foo\n|\nbar}}" -"{{foo\n|bar\n}}" -"{{foo\n|\nbar\n}}" -"{{foo\n|\nb\nar}}" -"{{foo\n|b\nar\n}}" -"{{foo\n|\nb\nar\n}}" -"{{\nfoo\n|\nbar}}" -"{{\nfoo\n|bar\n}}" -"{{\nfoo\n|\nbar\n}}" -"{{\nfoo\n|\nb\nar}}" -"{{\nfoo\n|b\nar\n}}" -"{{\nfoo\n|\nb\nar\n}}" -"{{f\noo|\nbar}}" -"{{\nf\noo|\nbar}}" -"{{f\noo\n|\nbar}}" -"{{\nf\noo\n|\nbar}}" +name: newline_unnamed_param_start +label: a newline at the start of an unnamed template parameter +input: "{{foo|\nbar}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateClose()] + +--- + +name: newline_unnamed_param_end +label: a newline at the end of an unnamed template parameter +input: "{{foo|bar\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateClose()] + +--- + +name: newline_unnamed_param_start_end +label: a newline at the start and end of an unnamed template parameter +input: "{{foo|\nbar\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateClose()] + +--- + +name: newline_unnamed_param_start_mid +label: a newline at the start and middle of an unnamed template parameter +input: "{{foo|\nb\nar}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar"), TemplateClose()] + +--- + +name: newline_unnamed_param_mid_end +label: a newline at the middle and end of an unnamed template parameter +input: "{{foo|b\nar\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="b\nar\n"), TemplateClose()] + +--- + +name: newline_unnamed_param_start_mid_end +label: a newline at the start, middle, and end of an unnamed template parameter +input: "{{foo|\nb\nar\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()] + +--- + +name: newline_start_unnamed_param_start +label: a newline at the start of a template name and at the start of an unnamed template parameter +input: "{{\nfoo|\nbar}}" +output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateClose()] + +--- + +name: newline_start_unnamed_param_end +label: a newline at the start of a template name and at the end of an unnamed template parameter +input: "{{\nfoo|bar\n}}" +output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateClose()] + +--- + +name: newline_start_unnamed_param_start_end +label: a newline at the start of a template name and at the start and end of an unnamed template parameter +input: "{{\nfoo|\nbar\n}}" +output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateClose()] + +--- + +name: newline_start_unnamed_param_start_mid +label: a newline at the start of a template name and at the start and middle of an unnamed template parameter +input: "{{\nfoo|\nb\nar}}" +output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="\nb\nar"), TemplateClose()] + +--- + +name: newline_start_unnamed_param_mid_end +label: a newline at the start of a template name and at the middle and end of an unnamed template parameter +input: "{{\nfoo|b\nar\n}}" +output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="b\nar\n"), TemplateClose()] + +--- + +name: newline_start_unnamed_param_start_mid_end +label: a newline at the start of a template name and at the start, middle, and end of an unnamed template parameter +input: "{{\nfoo|\nb\nar\n}}" +output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()] + +--- + +name: newline_end_unnamed_param_start +label: a newline at the end of a template name and at the start of an unnamed template parameter +input: "{{foo\n|\nbar}}" +output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="\nbar"), TemplateClose()] + +--- + +name: newline_end_unnamed_param_end +label: a newline at the end of a template name and at the end of an unnamed template parameter +input: "{{foo\n|bar\n}}" +output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="bar\n"), TemplateClose()] + +--- + +name: newline_end_unnamed_param_start_end +label: a newline at the end of a template name and at the start and end of an unnamed template parameter +input: "{{foo\n|\nbar\n}}" +output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateClose()] + +--- + +name: newline_end_unnamed_param_start_mid +label: a newline at the end of a template name and at the start and middle of an unnamed template parameter +input: "{{foo\n|\nb\nar}}" +output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="\nb\nar"), TemplateClose()] + +--- + +name: newline_end_unnamed_param_mid_end +label: a newline at the end of a template name and at the middle and end of an unnamed template parameter +input: "{{foo\n|b\nar\n}}" +output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="b\nar\n"), TemplateClose()] + +--- + +name: newline_end_unnamed_param_start_mid_end +label: a newline at the end of a template name and at the start, middle, and end of an unnamed template parameter +input: "{{foo\n|\nb\nar\n}}" +output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()] + +--- + +name: newline_start_end_unnamed_param_end +label: a newline at the start and end of a template name and the start of an unnamed template parameter +input: "{{\nfoo\n|\nbar}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nbar"), TemplateClose()] + +--- + +name: newline_start_end_unnamed_param_end +label: a newline at the start and end of a template name and the end of an unnamed template parameter +input: "{{\nfoo\n|bar\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="bar\n"), TemplateClose()] + +--- + +name: newline_start_end_unnamed_param_start_end +label: a newline at the start and end of a template name and the start and end of an unnamed template parameter +input: "{{\nfoo\n|\nbar\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateClose()] + +--- + +name: newline_start_end_unnamed_param_start_mid +label: a newline at the start and end of a template name and the start and middle of an unnamed template parameter +input: "{{\nfoo\n|\nb\nar}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar"), TemplateClose()] + +--- + +name: newline_start_end_unnamed_param_mid_end +label: a newline at the start and end of a template name and the middle and end of an unnamed template parameter +input: "{{\nfoo\n|b\nar\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="b\nar\n"), TemplateClose()] + +--- + +name: newline_start_end_unnamed_param_start_mid_end +label: a newline at the start and end of a template name and the start, middle, and end of an unnamed template parameter +input: "{{\nfoo\n|\nb\nar\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()] + +--- + +name: newline_mid_unnamed_param_start +label: a newline at the middle of a template name and at the start of an unnamed template parameter +input: "{{f\noo|\nbar}}" +output: [Text(text="{{f\noo|\nbar}}")] + +--- + +name: newline_start_mid_unnamed_param_start +label: a newline at the start and middle of a template name and at the start of an unnamed template parameter +input: "{{\nf\noo|\nbar}}" +output: [Text(text="{{\nf\noo|\nbar}}")] + +--- + +name: newline_start_end_unnamed_param_start +label: a newline at the middle and of a template name and at the start of an unnamed template parameter +input: "{{f\noo\n|\nbar}}" +output: [Text(text="{{f\noo\n|\nbar}}")] + +--- + +name: newline_start_mid_end_unnamed_param_start +label: a newline at the start, middle, and end of a template name and at the start of an unnamed template parameter +input: "{{\nf\noo\n|\nbar}}" +output: [Text(text="{{\nf\noo\n|\nbar}}")] + +--- + +name: incomplete_tests "{{foo|1=\nbar}}" "{{foo|1=bar\n}}" From 556477f8015bd987167e7e0beee0e78ae02b1a47 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 17 Feb 2013 15:04:19 -0500 Subject: [PATCH 016/115] Adding a bunch more tests. --- tests/tokenizer/templates.test | 296 +++++++++++++++++++++++++++++++++++------ 1 file changed, 259 insertions(+), 37 deletions(-) diff --git a/tests/tokenizer/templates.test b/tests/tokenizer/templates.test index 9223d61..c3416ff 100644 --- a/tests/tokenizer/templates.test +++ b/tests/tokenizer/templates.test @@ -460,44 +460,266 @@ output: [Text(text="{{\nf\noo\n|\nbar}}")] --- -name: incomplete_tests +name: newline_named_param_value_start +label: a newline at the start of a named parameter value +input: "{{foo|1=\nbar}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nbar"), TemplateClose()] + +--- + +name: newline_named_param_value_end +label: a newline at the end of a named parameter value +input: "{{foo|1=bar\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="bar\n"), TemplateClose()] + +--- + +name: newline_named_param_value_start_end +label: a newline at the start and end of a named parameter value +input: "{{foo|1=\nbar\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nbar\n"), TemplateClose()] + +--- + +name: newline_named_param_value_start_mid +label: a newline at the start and middle of a named parameter value +input: "{{foo|1=\nb\nar}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nb\nar"), TemplateClose()] + +--- + +name: newline_named_param_value_mid_end +label: a newline at the middle and end of a named parameter value +input: "{{foo|1=b\nar\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="b\nar\n"), TemplateClose()] + +--- + +name: newline_named_param_value_start_mid_end +label: a newline at the start, middle, and end of a named parameter value +input: "{{foo|1=\nb\nar\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nb\nar\n"), TemplateClose()] + +--- + +name: newline_named_param_name_start +label: a newline at the start of a parameter name +input: "{{foo|\nbar=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: newline_named_param_name_end +label: a newline at the end of a parameter name +input: "{{foo|bar\n=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: newline_named_param_name_start_end +label: a newline at the start and end of a parameter name +input: "{{foo|\nbar\n=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: newline_named_param_name_mid +label: a newline at the middle of a parameter name +input: "{{foo|b\nar=baz}}" +output: [Text(text="{{foo|b\nar=baz}}")] + +--- + +name: newline_named_param_name_start_mid +label: a newline at the start and middle of a parameter name +input: "{{foo|\nb\nar=baz}}" +output: [Text(text="{{foo|\nb\nar=baz}}")] + +--- + +name: newline_named_param_name_mid_end +label: a newline at the middle and end of a parameter name +input: "{{foo|b\nar\n=baz}}" +output: [Text(text="{{foo|b\nar\n=baz}}")] + +--- + +name: newline_named_param_name_start_mid_end +label: a newline at the start, middle, and end of a parameter name +input: "{{foo|\nb\nar\n=baz}}" +output: [Text(text="{{foo|\nb\nar\n=baz}}")] + +--- + +name: newline_named_param_name_start_param_value_end +label: a newline at the start of a parameter name and the end of a parameter value +input: "{{foo|\nbar=baz\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="baz\n"), TemplateClose()] + +--- + +name: newline_named_param_name_end_param_value_end +label: a newline at the end of a parameter name and the end of a parameter value +input: "{{foo|bar\n=baz\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="baz\n"), TemplateClose()] + +--- + +name: newline_named_param_name_start_end_param_value_end +label: a newline at the start and end of a parameter name and the end of a parameter value +input: "{{foo|\nbar\n=baz\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="baz\n"), TemplateClose()] + +--- + +name: newline_named_param_name_start_mid_param_value_end +label: a newline at the start and middle of a parameter name and the end of a parameter value +input: "{{foo|\nb\nar=baz\n}}" +output: [Text(text="{{foo|\nb\nar=baz\n}}")] + +--- + +name: newline_named_param_name_mid_end_param_value_end +label: a newline at the middle and end of a parameter name and the end of a parameter value +input: "{{foo|b\nar\n=baz\n}}" +output: [Text(text="{{foo|b\nar\n=baz\n}}")] + +--- + +name: newline_named_param_name_start_mid_end_param_value_end +label: a newline at the start, middle, and end of a parameter name and at the end of a parameter value +input: "{{foo|\nb\nar\n=baz\n}}" +output: [Text(text="{{foo|\nb\nar\n=baz\n}}")] + +--- + +name: newline_named_param_name_start_param_value_start +label: a newline at the start of a parameter name and at the start of a parameter value +input: "{{foo|\nbar=\nbaz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="\nbaz"), TemplateClose()] + +--- + +name: newline_named_param_name_end_param_value_start +label: a newline at the end of a parameter name and at the start of a parameter value +input: "{{foo|bar\n=\nbaz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="\nbaz"), TemplateClose()] + +--- + +name: newline_named_param_name_start_end_param_value_start +label: a newline at the start and end of a parameter name and at the start of a parameter value +input: "{{foo|\nbar\n=\nbaz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="\nbaz"), TemplateClose()] + +--- + +name: newline_named_param_name_start_mid_param_value_start +label: a newline at the start and middle of a parameter name and at the start of a parameter value +input: "{{foo|\nb\nar=\nbaz}}" +output: [Text(text="{{foo|\nb\nar=\nbaz}}")] + +--- + +name: newline_named_param_name_mid_end_param_value_start +label: a newline at the middle and end of a parameter name and at the start of a parameter value +input: "{{foo|b\nar\n=\nbaz}}" +output: [Text(text="{{foo|b\nar\n=\nbaz}}")] + +--- + +name: newline_named_param_name_start_mid_end_param_value_start +label: a newline at the start, middle, and end of a parameter name and at the start of a parameter value +input: "{{foo|\nb\nar\n=\nbaz}}" +output: [Text(text="{{foo|\nb\nar\n=\nbaz}}")] + +--- + +name: newline_named_param_name_start_param_value_start_end +label: a newline at the start of a parameter name and at the start and end of a parameter value +input: "{{foo|\nbar=\nbaz\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="\nbaz\n"), TemplateClose()] + +--- + +name: newline_named_param_name_end_param_value_start_end +label: a newline at the end of a parameter name and at the start and end of a parameter value +input: "{{foo|bar\n=\nbaz\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="\nbaz\n"), TemplateClose()] + +--- + +name: newline_named_param_name_start_end_param_value_start_end +label: a newline at the start and end of a parameter name and at the start and end of a parameter value +input: "{{foo|\nbar\n=\nbaz\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="\nbaz\n"), TemplateClose()] + +--- -"{{foo|1=\nbar}}" -"{{foo|1=bar\n}}" -"{{foo|1=\nbar\n}}" -"{{foo|1=\nb\nar}}" -"{{foo|1=b\nar\n}}" -"{{foo|1=\nb\nar\n}}" -"{{foo|\nbar=baz}}" -"{{foo|bar\n=baz}}" -"{{foo|\nbar\n=baz}}" -"{{foo|\nb\nar=baz}}" -"{{foo|b\nar\n=baz}}" -"{{foo|\nb\nar\n=baz}}" -"{{foo|\nbar=baz\n}}" -"{{foo|bar\n=baz\n}}" -"{{foo|\nbar\n=baz\n}}" -"{{foo|\nb\nar=baz\n}}" -"{{foo|b\nar\n=baz\n}}" -"{{foo|\nb\nar\n=baz\n}}" -"{{foo|\nbar=\nbaz}}" -"{{foo|bar\n=\nbaz}}" -"{{foo|\nbar\n=\nbaz}}" -"{{foo|\nb\nar=\nbaz}}" -"{{foo|b\nar\n=\nbaz}}" -"{{foo|\nb\nar\n=\nbaz}}" -"{{foo|\nbar=\nbaz\n}}" -"{{foo|bar\n=\nbaz\n}}" -"{{foo|\nbar\n=\nbaz\n}}" -"{{foo|\nb\nar=\nbaz\n}}" -"{{foo|b\nar\n=\nbaz\n}}" -"{{foo|\nb\nar\n=\nbaz\n}}" -"{{foo|\nbar=ba\nz}}" -"{{foo|bar\n=ba\nz}}" -"{{foo|\nbar\n=ba\nz}}" -"{{foo|\nb\nar=ba\nz}}" -"{{foo|b\nar\n=ba\nz}}" -"{{foo|\nb\nar\n=ba\nz}}" +name: newline_named_param_name_start_mid_param_value_start_end +label: a newline at the start and middle of a parameter name and at the start and end of a parameter value +input: "{{foo|\nb\nar=\nbaz\n}}" +output: [Text(text="{{foo|\nb\nar=\nbaz\n}}")] + +--- + +name: newline_named_param_name_mid_end_param_value_start_end +label: a newline at the middle and end of a parameter name and at the start and end of a parameter value +input: "{{foo|b\nar\n=\nbaz\n}}" +output: [Text(text="{{foo|b\nar\n=\nbaz\n}}")] + +--- + +name: newline_named_param_name_start_mid_end_param_value_start_end +label: a newline at the start, middle, and end of a parameter name and at the start and end of a parameter value +input: "{{foo|\nb\nar\n=\nbaz\n}}" +output: [Text(text="{{foo|\nb\nar\n=\nbaz\n}}")] + +--- + +name: newline_named_param_name_start_param_value_mid +label: a newline at the start of a parameter name and at the middle of a parameter value +input: "{{foo|\nbar=ba\nz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="ba\nz"), TemplateClose()] + +--- + +name: newline_named_param_name_end_param_value_mid +label: a newline at the end of a parameter name and at the middle of a parameter value +input: "{{foo|bar\n=ba\nz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="ba\nz"), TemplateClose()] + +--- + +name: newline_named_param_name_start_end_param_value_mid +label: a newline at the start and end of a parameter name and at the middle of a parameter value +input: "{{foo|\nbar\n=ba\nz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="ba\nz"), TemplateClose()] + +--- + +name: newline_named_param_name_start_mid_param_value_mid +label: a newline at the start and middle of a parameter name and at the middle of a parameter value +input: "{{foo|\nb\nar=ba\nz}}" +output: [Text(text="{{foo|\nb\nar=ba\nz}}")] + +--- + +name: newline_named_param_name_mid_end_param_value_mid +label: a newline at the middle and end of a parameter name and at the middle of a parameter value +input: "{{foo|b\nar\n=ba\nz}}" +output: [Text(text="{{foo|b\nar\n=ba\nz}}")] + +--- + +name: newline_named_param_start_mid_end_param_value_mid +label: a newline at the start, middle, and end of a parameter name and at the middle of a parameter value +input: "{{foo|\nb\nar\n=ba\nz}}" +output: [Text(text="{{foo|\nb\nar\n=ba\nz}}")] + +--- + +name: incomplete_tests "{{\nfoo\n|\nbar\n=\nb\naz\n|\nb\nuz\n}}" "{{\nfoo\n|\nb\nar\n|\nbaz\n=\nb\nuz\n}}" From 24c55aeeb183f4b7643e521e3125a8610a74674e Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 17 Feb 2013 21:52:08 -0500 Subject: [PATCH 017/115] Adding a syntax highlighter for the test-case format. --- tests/MWPFHTestCase.tmlanguage | 130 +++++++++++++++++++++ tests/_test_tokenizer.py | 2 +- .../tokenizer/{templates.test => templates.mwtest} | 0 tests/tokenizer/{text.test => text.mwtest} | 0 4 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 tests/MWPFHTestCase.tmlanguage rename tests/tokenizer/{templates.test => templates.mwtest} (100%) rename tests/tokenizer/{text.test => text.mwtest} (100%) diff --git a/tests/MWPFHTestCase.tmlanguage b/tests/MWPFHTestCase.tmlanguage new file mode 100644 index 0000000..e6ea7f0 --- /dev/null +++ b/tests/MWPFHTestCase.tmlanguage @@ -0,0 +1,130 @@ + + + + + fileTypes + + mwtest + + name + MWParserFromHell Test Case + patterns + + + match + --- + name + markup.heading.divider.mwpfh + + + captures + + 1 + + name + keyword.other.name.mwpfh + + 2 + + name + variable.other.name.mwpfh + + + match + (name:)\s*(\w*) + name + meta.name.mwpfh + + + captures + + 1 + + name + keyword.other.label.mwpfh + + 2 + + name + comment.line.other.label.mwpfh + + + match + (label:)\s*(.*) + name + meta.label.mwpfh + + + captures + + 1 + + name + keyword.other.input.mwpfh + + 2 + + name + string.quoted.double.input.mwpfh + + + match + (input:)\s*(.*) + name + meta.input.mwpfh + + + captures + + 1 + + name + keyword.other.output.mwpfh + + + match + (output:) + name + meta.output.mwpfh + + + captures + + 1 + + name + support.language.token.mwpfh + + + match + (\w+)\s*\( + name + meta.name.token.mwpfh + + + captures + + 1 + + name + variable.parameter.token.mwpfh + + + match + (\w+)\s*(=) + name + meta.name.parameter.token.mwpfh + + + match + ".*?" + name + string.quoted.double.mwpfh + + + scopeName + text.mwpfh + uuid + cd3e2ffa-a57d-4c40-954f-1a2e87ffd638 + + diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 114b835..4d12dc9 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -107,7 +107,7 @@ class TokenizerTestCase(object): def build(cls): """Load and install all tests from the 'tokenizer' directory.""" directory = path.join(path.dirname(__file__), "tokenizer") - extension = ".test" + extension = ".mwtest" for filename in listdir(directory): if not filename.endswith(extension): continue diff --git a/tests/tokenizer/templates.test b/tests/tokenizer/templates.mwtest similarity index 100% rename from tests/tokenizer/templates.test rename to tests/tokenizer/templates.mwtest diff --git a/tests/tokenizer/text.test b/tests/tokenizer/text.mwtest similarity index 100% rename from tests/tokenizer/text.test rename to tests/tokenizer/text.mwtest From 31a977bdfe2d12487417d4ef1c343fc12209b148 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 17 Feb 2013 22:39:53 -0500 Subject: [PATCH 018/115] Finish all incomplete template tests. --- tests/tokenizer/templates.mwtest | 236 +++++++++++++++++++++++++++++++++------ 1 file changed, 200 insertions(+), 36 deletions(-) diff --git a/tests/tokenizer/templates.mwtest b/tests/tokenizer/templates.mwtest index c3416ff..d699ef2 100644 --- a/tests/tokenizer/templates.mwtest +++ b/tests/tokenizer/templates.mwtest @@ -719,39 +719,203 @@ output: [Text(text="{{foo|\nb\nar\n=ba\nz}}")] --- -name: incomplete_tests - -"{{\nfoo\n|\nbar\n=\nb\naz\n|\nb\nuz\n}}" -"{{\nfoo\n|\nb\nar\n|\nbaz\n=\nb\nuz\n}}" - -"{{\nfoo\n|\n{{\nbar\n|\nbaz\n=\nb\niz\n}}\n=\nb\nuzz\n}}" - -"{{foo{bar}}" -"{{foo}bar}}" -"{{{foobar}}" -"{{foo{b{ar}}" -"{{foo[bar}}" -"{{foo]bar}}" -"{{[foobar}}" -"{{foobar]}}" - -"{{foo|ba{r}}" -"{{foo|ba{r}}}" -"{{foo|ba{r}=baz}}" -"{{foo|ba[r]}}" - -"{{foobar" -"{{foobar}" -"{{foobar|" -"{{foo|bar" -"{{foo|bar|" -"{{foo|bar=" -"{{foo|bar=|" -"{{foo|bar=baz" -"{{foo|bar=baz|" -"{{foo|bar|baz" -"{{foo|bar|baz=" -"{{foo|bar|baz=biz" -"{{foo|bar=baz|biz" -"{{foo|bar=baz|biz=" -"{{foo|bar=baz|biz=buzz" +name: newline_wildcard +label: a random, complex assortment of templates and newlines +input: "{{\nfoo\n|\nbar\n=\nb\naz\n|\nb\nuz\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="\nb\naz\n"), TemplateParamSeparator(), Text(text="\nb\nuz\n"), TemplateClose()] + +--- + +name: newline_wildcard_redux +label: an even more random and complex assortment of templates and newlines +input: "{{\nfoo\n|\n{{\nbar\n|\nbaz\n=\nb\niz\n}}\n=\nb\nuzz\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\n"), TemplateOpen(), Text(text="\nbar\n"), TemplateParamSeparator(), Text(text="\nbaz\n"), TemplateParamEquals(), Text(text="\nb\niz\n"), TemplateClose(), Text(text="\n"), TemplateParamEquals(), Text(text="\nb\nuzz\n"), TemplateClose()] + +--- + +name: invalid_name_left_brace_middle +label: invalid characters in template name: left brace in middle +input: "{{foo{bar}}" +output: [Text(text="{{foo{bar}}")] + +--- + +name: invalid_name_right_brace_middle +label: invalid characters in template name: right brace in middle +input: "{{foo}bar}}" +output: [Text(text="{{foo}bar}}")] + +--- + +name: invalid_name_left_braces +label: invalid characters in template name: two left braces in middle +input: "{{foo{b{ar}}" +output: [Text(text="{{foo{b{ar}}")] + +--- + +name: invalid_name_left_bracket_middle +label: invalid characters in template name: left bracket in middle +input: "{{foo[bar}}" +output: [Text(text="{{foo[bar}}")] + +--- + +name: invalid_name_right_bracket_middle +label: invalid characters in template name: right bracket in middle +input: "{{foo]bar}}" +output: [Text(text="{{foo]bar}}")] + +--- + +name: invalid_name_left_bracket_start +label: invalid characters in template name: left bracket at start +input: "{{[foobar}}" +output: [Text(text="{{[foobar}}")] + +--- + +name: invalid_name_right_bracket_start +label: invalid characters in template name: right bracket at end +input: "{{foobar]}}" +output: [Text(text="{{foobar]}}")] + +--- + +name: valid_name_left_brace_start +label: valid characters in template name: left brace at start +input: "{{{foobar}}" +output: [Text(text="{"), TemplateOpen(), Text(text="foobar"), TemplateClose()] + +--- + +name: valid_unnamed_param_left_brace +label: valid characters in unnamed template parameter: left brace +input: "{{foo|ba{r}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r"), TemplateClose()] + +--- + +name: valid_unnamed_param_braces +label: valid characters in unnamed template parameter: left and right braces +input: "{{foo|ba{r}}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r"), TemplateClose(), Text(text="}")] + +--- + +name: valid_param_name_braces +label: valid characters in template parameter name: left and right braces +input: "{{foo|ba{r}=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba{r}"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: valid_param_name_brackets +label: valid characters in unnamed template parameter: left and right brackets +input: "{{foo|ba[r]}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba[r]"), TemplateClose()] + +--- + +name: incomplete_plain +label: incomplete templates that should fail gracefully: no close whatsoever +input: "{{stuff}} {{foobar" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar")] + +--- + +name: incomplete_right_brace +label: incomplete templates that should fail gracefully: only one right brace +input: "{{stuff}} {{foobar}" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar}")] + +--- + +name: incomplete_pipe +label: incomplete templates that should fail gracefully: a pipe +input: "{{stuff}} {{foobar|" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foobar|")] + +--- + +name: incomplete_unnamed_param +label: incomplete templates that should fail gracefully: an unnamed parameter +input: "{{stuff}} {{foo|bar" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar")] + +--- + +name: incomplete_unnamed_param_pipe +label: incomplete templates that should fail gracefully: an unnamed parameter, then a pipe +input: "{{stuff}} {{foo|bar|" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|")] + +--- + +name: incomplete_valueless_param +label: incomplete templates that should fail gracefully: an a named parameter with no value +input: "{{stuff}} {{foo|bar=" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=")] + +--- + +name: incomplete_valueless_param_pipe +label: incomplete templates that should fail gracefully: a named parameter with no value, then a pipe +input: "{{stuff}} {{foo|bar=|" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=|")] + +--- + +name: incomplete_named_param +label: incomplete templates that should fail gracefully: a named parameter with a value +input: "{{stuff}} {{foo|bar=baz" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz")] + +--- + +name: incomplete_named_param_pipe +label: incomplete templates that should fail gracefully: a named parameter with a value, then a paipe +input: "{{stuff}} {{foo|bar=baz|" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|")] + +--- + +name: incomplete_two_unnamed_params +label: incomplete templates that should fail gracefully: two unnamed parameters +input: "{{stuff}} {{foo|bar|baz" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz")] + +--- + +name: incomplete_unnamed_param_valueless_param +label: incomplete templates that should fail gracefully: an unnamed parameter, then a named parameter with no value +input: "{{stuff}} {{foo|bar|baz=" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz=")] + +--- + +name: incomplete_unnamed_param_named_param +label: incomplete templates that should fail gracefully: an unnamed parameter, then a named parameter with a value +input: "{{stuff}} {{foo|bar|baz=biz" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar|baz=biz")] + +--- + +name: incomplete_named_param_unnamed_param +label: incomplete templates that should fail gracefully: a named parameter with a value, then an unnamed parameter +input: "{{stuff}} {{foo|bar=baz|biz" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz")] + +--- + +name: incomplete_named_param_valueless_param +label: incomplete templates that should fail gracefully: a named parameter with a value, then a named parameter with no value +input: "{{stuff}} {{foo|bar=baz|biz=" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz=")] + +--- + +name: incomplete_two_named_params +label: incomplete templates that should fail gracefully: two named parameters with values +input: "{{stuff}} {{foo|bar=baz|biz=buzz" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz=buzz")] From acb7e579045e4ab74fbc025894d49cad72241b51 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 23 Feb 2013 12:14:06 -0500 Subject: [PATCH 019/115] Make mwparserfromhell.parser() be an alias for parse_anything(). Some other changes, including removal of the 'string' import in the tokenizer. --- mwparserfromhell/__init__.py | 5 ++--- mwparserfromhell/parser/__init__.py | 5 +---- mwparserfromhell/parser/tokenizer.py | 5 ++--- mwparserfromhell/utils.py | 14 +++++++------- 4 files changed, 12 insertions(+), 17 deletions(-) diff --git a/mwparserfromhell/__init__.py b/mwparserfromhell/__init__.py index e18000b..99bc0c2 100644 --- a/mwparserfromhell/__init__.py +++ b/mwparserfromhell/__init__.py @@ -34,7 +34,6 @@ __license__ = "MIT License" __version__ = "0.2.dev" __email__ = "ben.kurtovic@verizon.net" -from . import nodes, parser, smart_list, string_mixin, wikicode +from . import compat, nodes, parser, smart_list, string_mixin, utils, wikicode -parse = lambda text: parser.Parser(text).parse() -parse.__doc__ = "Short for :py:meth:`.Parser.parse`." +parse = utils.parse_anything diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py index 074b9ba..3f034f6 100644 --- a/mwparserfromhell/parser/__init__.py +++ b/mwparserfromhell/parser/__init__.py @@ -26,10 +26,7 @@ modules: the :py:mod:`~.tokenizer` and the :py:mod:`~.builder`. This module joins them together under one interface. """ -try: - from ._builder import CBuilder as Builder -except ImportError: - from .builder import Builder +from .builder import Builder try: from ._tokenizer import CTokenizer as Tokenizer except ImportError: diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index eead131..c02e353 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -23,7 +23,6 @@ from __future__ import unicode_literals from math import log import re -import string from . import contexts from . import tokens @@ -377,9 +376,9 @@ class Tokenizer(object): else: numeric = hexadecimal = False - valid = string.hexdigits if hexadecimal else string.digits + valid = "0123456789abcdefABCDEF" if hexadecimal else "0123456789" if not numeric and not hexadecimal: - valid += string.ascii_letters + valid += "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" if not all([char in valid for char in this]): self._fail_route() diff --git a/mwparserfromhell/utils.py b/mwparserfromhell/utils.py index 83264e2..b797419 100644 --- a/mwparserfromhell/utils.py +++ b/mwparserfromhell/utils.py @@ -34,16 +34,16 @@ from .smart_list import SmartList def parse_anything(value): """Return a :py:class:`~.Wikicode` for *value*, allowing multiple types. - This differs from :py:func:`mwparserfromhell.parse` in that we accept more - than just a string to be parsed. Unicode objects (strings in py3k), strings - (bytes in py3k), integers (converted to strings), ``None``, existing + This differs from :py:meth:`.Parser.parse` in that we accept more than just + a string to be parsed. Unicode objects (strings in py3k), strings (bytes in + py3k), integers (converted to strings), ``None``, existing :py:class:`~.Node` or :py:class:`~.Wikicode` objects, as well as an iterable of these types, are supported. This is used to parse input on-the-fly by various methods of :py:class:`~.Wikicode` and others like :py:class:`~.Template`, such as :py:meth:`wikicode.insert() <.Wikicode.insert>` or setting :py:meth:`template.name <.Template.name>`. """ - from . import parse + from .parser import Parser from .wikicode import Wikicode if isinstance(value, Wikicode): @@ -51,11 +51,11 @@ def parse_anything(value): elif isinstance(value, Node): return Wikicode(SmartList([value])) elif isinstance(value, str): - return parse(value) + return Parser(value).parse() elif isinstance(value, bytes): - return parse(value.decode("utf8")) + return Parser(value.decode("utf8")).parse() elif isinstance(value, int): - return parse(str(value)) + return Parser(str(value)).parse() elif value is None: return Wikicode(SmartList()) try: From 0803417901d09d7df830e65300355507715e67cb Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 23 Feb 2013 13:12:16 -0500 Subject: [PATCH 020/115] Port CTokenizer's verify_safe method to Python to solve a failing test. --- mwparserfromhell/parser/contexts.py | 62 +++++++++++------- mwparserfromhell/parser/tokenizer.c | 12 ++-- mwparserfromhell/parser/tokenizer.h | 1 + mwparserfromhell/parser/tokenizer.py | 122 +++++++++++++++++++++++++---------- 4 files changed, 137 insertions(+), 60 deletions(-) diff --git a/mwparserfromhell/parser/contexts.py b/mwparserfromhell/parser/contexts.py index b65946c..896d137 100644 --- a/mwparserfromhell/parser/contexts.py +++ b/mwparserfromhell/parser/contexts.py @@ -62,6 +62,15 @@ Local (stack-specific) contexts: * :py:const:`COMMENT` +* :py:const:`SAFETY_CHECK` + + * :py:const:`HAS_TEXT` + * :py:const:`FAIL_ON_TEXT` + * :py:const:`FAIL_NEXT` + * :py:const:`FAIL_ON_LBRACE` + * :py:const:`FAIL_ON_RBRACE` + * :py:const:`FAIL_ON_EQUALS` + Global contexts: * :py:const:`GL_HEADING` @@ -69,29 +78,36 @@ Global contexts: # Local contexts: -TEMPLATE = 0b00000000000111 -TEMPLATE_NAME = 0b00000000000001 -TEMPLATE_PARAM_KEY = 0b00000000000010 -TEMPLATE_PARAM_VALUE = 0b00000000000100 - -ARGUMENT = 0b00000000011000 -ARGUMENT_NAME = 0b00000000001000 -ARGUMENT_DEFAULT = 0b00000000010000 - -WIKILINK = 0b00000001100000 -WIKILINK_TITLE = 0b00000000100000 -WIKILINK_TEXT = 0b00000001000000 - -HEADING = 0b01111110000000 -HEADING_LEVEL_1 = 0b00000010000000 -HEADING_LEVEL_2 = 0b00000100000000 -HEADING_LEVEL_3 = 0b00001000000000 -HEADING_LEVEL_4 = 0b00010000000000 -HEADING_LEVEL_5 = 0b00100000000000 -HEADING_LEVEL_6 = 0b01000000000000 - -COMMENT = 0b10000000000000 - +TEMPLATE = 0b00000000000000000111 +TEMPLATE_NAME = 0b00000000000000000001 +TEMPLATE_PARAM_KEY = 0b00000000000000000010 +TEMPLATE_PARAM_VALUE = 0b00000000000000000100 + +ARGUMENT = 0b00000000000000011000 +ARGUMENT_NAME = 0b00000000000000001000 +ARGUMENT_DEFAULT = 0b00000000000000010000 + +WIKILINK = 0b00000000000001100000 +WIKILINK_TITLE = 0b00000000000000100000 +WIKILINK_TEXT = 0b00000000000001000000 + +HEADING = 0b00000001111110000000 +HEADING_LEVEL_1 = 0b00000000000010000000 +HEADING_LEVEL_2 = 0b00000000000100000000 +HEADING_LEVEL_3 = 0b00000000001000000000 +HEADING_LEVEL_4 = 0b00000000010000000000 +HEADING_LEVEL_5 = 0b00000000100000000000 +HEADING_LEVEL_6 = 0b00000001000000000000 + +COMMENT = 0b00000010000000000000 + +SAFETY_CHECK = 0b11111100000000000000 +HAS_TEXT = 0b00000100000000000000 +FAIL_ON_TEXT = 0b00001000000000000000 +FAIL_NEXT = 0b00010000000000000000 +FAIL_ON_LBRACE = 0b00100000000000000000 +FAIL_ON_RBRACE = 0b01000000000000000000 +FAIL_ON_EQUALS = 0b10000000000000000000 # Global contexts: diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 09649a7..d82b080 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -1324,10 +1324,14 @@ Tokenizer_parse(Tokenizer* self, int context) Tokenizer_write_text(self, this); } else if (this == next && next == *"[") { - if (Tokenizer_parse_wikilink(self)) - return NULL; - if (self->topstack->context & LC_FAIL_NEXT) - self->topstack->context ^= LC_FAIL_NEXT; + if (!(this_context & LC_WIKILINK_TITLE)) { + if (Tokenizer_parse_wikilink(self)) + return NULL; + if (self->topstack->context & LC_FAIL_NEXT) + self->topstack->context ^= LC_FAIL_NEXT; + } + else + Tokenizer_write_text(self, this); } else if (this == *"|" && this_context & LC_WIKILINK_TITLE) { if (Tokenizer_handle_wikilink_separator(self)) diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index 3293a8f..af86321 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -118,6 +118,7 @@ static PyObject* TagCloseClose; #define LC_COMMENT 0x02000 +#define LC_SAFETY_CHECK 0xFC000 #define LC_HAS_TEXT 0x04000 #define LC_FAIL_ON_TEXT 0x08000 #define LC_FAIL_NEXT 0x10000 diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index eead131..a365db8 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -213,28 +213,9 @@ class Tokenizer(object): self._write_all(argument) self._write(tokens.ArgumentClose()) - def _verify_safe(self, unsafes, strip=True): - """Verify that there are no unsafe characters in the current stack. - - The route will be failed if the name contains any element of *unsafes* - in it. This is used when parsing template names, parameter keys, and so - on, which cannot contain newlines and some other characters. If *strip* - is ``True``, the text will be stripped of whitespace, since this is - allowed at the ends of certain elements but not between text. - """ - self._push_textbuffer() - if self._stack: - text = [tok for tok in self._stack if isinstance(tok, tokens.Text)] - text = "".join([token.text for token in text]) - if strip: - text = text.strip() - if text and any([unsafe in text for unsafe in unsafes]): - self._fail_route() - def _handle_template_param(self): """Handle a template parameter at the head of the string.""" if self._context & contexts.TEMPLATE_NAME: - self._verify_safe(["\n", "{", "}", "[", "]"]) self._context ^= contexts.TEMPLATE_NAME elif self._context & contexts.TEMPLATE_PARAM_VALUE: self._context ^= contexts.TEMPLATE_PARAM_VALUE @@ -246,11 +227,6 @@ class Tokenizer(object): def _handle_template_param_value(self): """Handle a template parameter's value at the head of the string.""" - try: - self._verify_safe(["\n", "{{", "}}"]) - except BadRoute: - self._pop() - raise self._write_all(self._pop(keep_context=True)) self._context ^= contexts.TEMPLATE_PARAM_KEY self._context |= contexts.TEMPLATE_PARAM_VALUE @@ -258,24 +234,19 @@ class Tokenizer(object): def _handle_template_end(self): """Handle the end of a template at the head of the string.""" - if self._context & contexts.TEMPLATE_NAME: - self._verify_safe(["\n", "{", "}", "[", "]"]) - elif self._context & contexts.TEMPLATE_PARAM_KEY: + if self._context & contexts.TEMPLATE_PARAM_KEY: self._write_all(self._pop(keep_context=True)) self._head += 1 return self._pop() def _handle_argument_separator(self): """Handle the separator between an argument's name and default.""" - self._verify_safe(["\n", "{{", "}}"]) self._context ^= contexts.ARGUMENT_NAME self._context |= contexts.ARGUMENT_DEFAULT self._write(tokens.ArgumentSeparator()) def _handle_argument_end(self): """Handle the end of an argument at the head of the string.""" - if self._context & contexts.ARGUMENT_NAME: - self._verify_safe(["\n", "{{", "}}"]) self._head += 2 return self._pop() @@ -295,15 +266,12 @@ class Tokenizer(object): def _handle_wikilink_separator(self): """Handle the separator between a wikilink's title and its text.""" - self._verify_safe(["\n", "{", "}", "[", "]"], strip=False) self._context ^= contexts.WIKILINK_TITLE self._context |= contexts.WIKILINK_TEXT self._write(tokens.WikilinkSeparator()) def _handle_wikilink_end(self): """Handle the end of a wikilink at the head of the string.""" - if self._context & contexts.WIKILINK_TITLE: - self._verify_safe(["\n", "{", "}", "[", "]"], strip=False) self._head += 1 return self._pop() @@ -424,11 +392,94 @@ class Tokenizer(object): self._write(tokens.CommentEnd()) self._head += 2 + def _verify_safe(self, this): + """Make sure we are not trying to write an invalid character.""" + context = self._context + if context & contexts.FAIL_NEXT: + self._fail_route() + if context & contexts.WIKILINK_TITLE: + if this == "]" or this == "{": + self._context |= contexts.FAIL_NEXT + elif this == "\n" or this == "[" or this == "}": + self._fail_route() + return + if context & contexts.TEMPLATE_NAME: + if this == "{" or this == "}" or this == "[": + self._context |= contexts.FAIL_NEXT + return + if this == "]": + self._fail_route() + return + if this == "|": + return + elif context & (contexts.TEMPLATE_PARAM_KEY | contexts.ARGUMENT_NAME): + if context & contexts.FAIL_ON_EQUALS: + if this == "=": + self._fail_route() + return + elif context & contexts.FAIL_ON_LBRACE: + if this == "{": + if context & contexts.TEMPLATE: + self._context |= contexts.FAIL_ON_EQUALS + else: + self._context |= contexts.FAIL_NEXT + return + self._context ^= contexts.FAIL_ON_LBRACE + elif context & contexts.FAIL_ON_RBRACE: + if this == "}": + if context & contexts.TEMPLATE: + self._context |= contexts.FAIL_ON_EQUALS + else: + self._context |= contexts.FAIL_NEXT + return + self._context ^= contexts.FAIL_ON_RBRACE + elif this == "{": + self._context |= contexts.FAIL_ON_LBRACE + elif this == "}": + self._context |= contexts.FAIL_ON_RBRACE + if context & contexts.HAS_TEXT: + if context & contexts.FAIL_ON_TEXT: + if this is self.END or not this.isspace(): + if context & contexts.TEMPLATE_PARAM_KEY: + self._context ^= contexts.FAIL_ON_TEXT + self._context |= contexts.FAIL_ON_EQUALS + else: + self._fail_route() + return + else: + if this == "\n": + self._context |= contexts.FAIL_ON_TEXT + elif this is self.END or not this.isspace(): + self._context |= contexts.HAS_TEXT + + def _reset_safety_checks(self): + """Unset any safety-checking contexts set by Tokenizer_verify_safe(). + + Used when we preserve a context but previous data becomes invalid, like + when moving between template parameters. + """ + context = self._context + checks = (contexts.HAS_TEXT, contexts.FAIL_ON_TEXT, contexts.FAIL_NEXT, + contexts.FAIL_ON_LBRACE, contexts.FAIL_ON_RBRACE, + contexts.FAIL_ON_EQUALS) + for check in checks: + if context & check: + self._context ^= check; + def _parse(self, context=0): """Parse the wikicode string, using *context* for when to stop.""" self._push(context) while True: this = self._read() + unsafe = (contexts.TEMPLATE_NAME | contexts.WIKILINK_TITLE | + contexts.TEMPLATE_PARAM_KEY | contexts.ARGUMENT_NAME) + if self._context & unsafe: + try: + self._verify_safe(this) + except BadRoute: + if self._context & contexts.TEMPLATE_PARAM_KEY: + self._pop() + raise if this not in self.MARKERS: self._write_text(this) self._head += 1 @@ -450,7 +501,10 @@ class Tokenizer(object): self._write_text(this) elif this == next == "{": self._parse_template_or_argument() + if self._context & contexts.FAIL_NEXT: + self._context ^= contexts.FAIL_NEXT elif this == "|" and self._context & contexts.TEMPLATE: + self._reset_safety_checks() self._handle_template_param() elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY: self._handle_template_param_value() @@ -466,6 +520,8 @@ class Tokenizer(object): elif this == next == "[": if not self._context & contexts.WIKILINK_TITLE: self._parse_wikilink() + if self._context & contexts.FAIL_NEXT: + self._context ^= contexts.FAIL_NEXT else: self._write_text("[") elif this == "|" and self._context & contexts.WIKILINK_TITLE: From 111a71f0c242b6827b2f5a02731f2e198ba7b70e Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 25 Feb 2013 00:18:03 -0500 Subject: [PATCH 021/115] Committing an empty file to work on later. --- tests/test_string_mixin.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 tests/test_string_mixin.py diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py new file mode 100644 index 0000000..b9413ec --- /dev/null +++ b/tests/test_string_mixin.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals + +import mwparserfromhell + +class TestStringMixIn(unittest.TestCase): + """Test cases for the StringMixIn class.""" + def test_(self): + pass + +if __name__ == "__main__": + unittest.main(verbosity=2) From 221af8a9d7100d69d03e1af8ad6b4e020e2cceb4 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 26 Feb 2013 10:55:49 -0500 Subject: [PATCH 022/115] Adding some tests to TestStringMixIn --- mwparserfromhell/string_mixin.py | 1 - tests/test_string_mixin.py | 80 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 78 insertions(+), 3 deletions(-) diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index d7a0749..ac47251 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -50,7 +50,6 @@ class StringMixIn(object): :py:meth:`__unicode__` instead of the immutable ``self`` like the regular ``str`` type. """ - if py3k: def __str__(self): return self.__unicode__() diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index b9413ec..0d2ca43 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -21,12 +21,88 @@ # SOFTWARE. from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import py3k, str +from mwparserfromhell.string_mixin import StringMixIn + +class _FakeString(StringMixIn): + def __init__(self, data): + self._data = data + + def __unicode__(self): + return self._data -import mwparserfromhell class TestStringMixIn(unittest.TestCase): """Test cases for the StringMixIn class.""" - def test_(self): + def test_docs(self): + """make sure the various functions of StringMixIn have docstrings""" + methods = [ + "capitalize", "center", "count", "encode", "endswith", + "expandtabs", "find", "format", "index", "isalnum", "isalpha", + "isdecimal", "isdigit", "islower", "isnumeric", "isspace", + "istitle", "isupper", "join", "ljust", "lstrip", "partition", + "replace", "rfind", "rindex", "rjust", "rpartition", "rsplit", + "rstrip", "split", "splitlines", "startswith", "strip", "swapcase", + "title", "translate", "upper", "zfill"] + if not py3k: + methods.append("decode") + for meth in methods: + expected = getattr(str, meth).__doc__ + actual = getattr(StringMixIn, meth).__doc__ + self.assertEquals(expected, actual) + + def test_types(self): + """make sure StringMixIns convert to different types correctly""" + pass + + def test_comparisons(self): + """make sure comparison operators work""" + str1 = _FakeString("this is a fake string") + str2 = _FakeString("this is a fake string") + str3 = _FakeString("fake string, this is") + str4 = "this is a fake string" + str5 = "fake string, this is" + + self.assertFalse(str1 > str2) + self.assertTrue(str1 >= str2) + self.assertTrue(str1 == str2) + self.assertFalse(str1 != str2) + self.assertFalse(str1 < str2) + self.assertTrue(str1 <= str2) + + self.assertTrue(str1 > str3) + self.assertTrue(str1 >= str3) + self.assertFalse(str1 == str3) + self.assertTrue(str1 != str3) + self.assertFalse(str1 < str3) + self.assertFalse(str1 <= str3) + + self.assertFalse(str1 > str4) + self.assertTrue(str1 >= str4) + self.assertTrue(str1 == str4) + self.assertFalse(str1 != str4) + self.assertFalse(str1 < str4) + self.assertTrue(str1 <= str4) + + self.assertTrue(str1 > str5) + self.assertTrue(str1 >= str5) + self.assertFalse(str1 == str5) + self.assertTrue(str1 != str5) + self.assertFalse(str1 < str5) + self.assertFalse(str1 <= str5) + + def test_operators(self): + """make sure string addition and multiplication work""" + pass + + def test_other_magics(self): + """test other magically implemented features, like len() and iter()""" + pass + + def test_other_methods(self): + """test the remaining non-magic methods of StringMixIn""" pass if __name__ == "__main__": From 6e748004d1fa16ec812a527644f2f24515d0ff00 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Feb 2013 10:41:21 -0500 Subject: [PATCH 023/115] test_types(), test_other_magics(); add range to compat --- mwparserfromhell/compat.py | 2 ++ tests/test_string_mixin.py | 68 +++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 63 insertions(+), 7 deletions(-) diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py index 576c2c5..48b9807 100755 --- a/mwparserfromhell/compat.py +++ b/mwparserfromhell/compat.py @@ -16,6 +16,7 @@ if py3k: bytes = bytes str = str basestring = str + range = range maxsize = sys.maxsize import html.entities as htmlentities from io import StringIO @@ -24,6 +25,7 @@ else: bytes = str str = unicode basestring = basestring + range = xrange maxsize = sys.maxint import htmlentitydefs as htmlentities from StringIO import StringIO diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 0d2ca43..0e60309 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -21,9 +21,10 @@ # SOFTWARE. from __future__ import unicode_literals +from types import GeneratorType import unittest -from mwparserfromhell.compat import py3k, str +from mwparserfromhell.compat import bytes, py3k, range, str from mwparserfromhell.string_mixin import StringMixIn class _FakeString(StringMixIn): @@ -55,7 +56,20 @@ class TestStringMixIn(unittest.TestCase): def test_types(self): """make sure StringMixIns convert to different types correctly""" - pass + fstr = _FakeString("fake string") + self.assertEquals(str(fstr), "fake string") + self.assertEquals(bytes(fstr), b"fake string") + if py3k: + self.assertEquals(repr(fstr), "'fake string'") + else: + self.assertEquals(repr(fstr), b"u'fake string'") + + self.assertIsInstance(str(fstr), str) + self.assertIsInstance(bytes(fstr), bytes) + if py3k: + self.assertIsInstance(repr(fstr), str) + else: + self.assertIsInstance(repr(fstr), bytes) def test_comparisons(self): """make sure comparison operators work""" @@ -93,13 +107,53 @@ class TestStringMixIn(unittest.TestCase): self.assertFalse(str1 < str5) self.assertFalse(str1 <= str5) - def test_operators(self): - """make sure string addition and multiplication work""" - pass - def test_other_magics(self): """test other magically implemented features, like len() and iter()""" - pass + str1 = _FakeString("fake string") + str2 = _FakeString("") + expected = ["f", "a", "k", "e", " ", "s", "t", "r", "i", "n", "g"] + + self.assertTrue(str1) + self.assertFalse(str2) + self.assertEquals(11, len(str1)) + self.assertEquals(0, len(str2)) + + out = [] + for ch in str1: + out.append(ch) + self.assertEquals(expected, out) + + out = [] + for ch in str2: + out.append(ch) + self.assertEquals([], out) + + gen1 = iter(str1) + gen2 = iter(str2) + self.assertIsInstance(gen1, GeneratorType) + self.assertIsInstance(gen2, GeneratorType) + + out = [] + for i in range(len(str1)): + out.append(gen1.next()) + self.assertRaises(StopIteration, gen1.next) + self.assertEquals(expected, out) + self.assertRaises(StopIteration, gen2.next) + + self.assertEquals("f", str1[0]) + self.assertEquals(" ", str1[4]) + self.assertEquals("g", str1[10]) + self.assertEquals("n", str1[-2]) + self.assertRaises(IndexError, lambda: str1[11]) + self.assertRaises(IndexError, lambda: str2[0]) + + self.assertTrue("k" in str1) + self.assertTrue("fake" in str1) + self.assertTrue("str" in str1) + self.assertTrue("" in str1) + self.assertTrue("" in str2) + self.assertFalse("real" in str1) + self.assertFalse("s" in str2) def test_other_methods(self): """test the remaining non-magic methods of StringMixIn""" From e2fe0120ea128ac6df646e09b25468507e3f2aec Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Feb 2013 10:56:57 -0500 Subject: [PATCH 024/115] Some tests for test_other_methods() --- tests/test_string_mixin.py | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 0e60309..74da9ff 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -157,7 +157,43 @@ class TestStringMixIn(unittest.TestCase): def test_other_methods(self): """test the remaining non-magic methods of StringMixIn""" - pass + fstr = _FakeString("fake string") + + self.assertEquals("Fake string", fstr.capitalize()) + + self.assertEquals(" fake string ", fstr.center(15)) + self.assertEquals(" fake string ", fstr.center(16)) + self.assertEquals("qqfake stringqq", fstr.center(15, "q")) + + self.assertEquals(1, fstr.count("e")) + self.assertEquals(0, fstr.count("z")) + self.assertEquals(1, fstr.count("r", 7)) + self.assertEquals(0, fstr.count("r", 8)) + self.assertEquals(1, fstr.count("r", 5, 9)) + self.assertEquals(0, fstr.count("r", 5, 7)) + + if not py3k: + self.assertEquals(fstr, fstr.decode()) + self.assertEquals("𐌲𐌿𐍄", '\\U00010332\\U0001033f\\U00010344'.decode("unicode_escape")) + + self.assertEquals(b"fake string", fstr.encode()) + self.assertEquals(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", + "𐌲𐌿𐍄".encode("utf8")) + self.assertRaises(UnicodeEncodeError, "𐌲𐌿𐍄".encode) + self.assertRaises(UnicodeEncodeError, "𐌲𐌿𐍄".encode, "ascii") + self.assertRaises(UnicodeEncodeError, "𐌲𐌿𐍄".encode, "ascii", "strict") + self.assertEquals("", "𐌲𐌿𐍄".encode("ascii", "ignore")) + + self.assertTrue(fstr.endswith("ing")) + self.assertFalse(fstr.endswith("ingh")) + + methods = [ + "expandtabs", "find", "format", "index", "isalnum", "isalpha", + "isdecimal", "isdigit", "islower", "isnumeric", "isspace", + "istitle", "isupper", "join", "ljust", "lstrip", "partition", + "replace", "rfind", "rindex", "rjust", "rpartition", "rsplit", + "rstrip", "split", "splitlines", "startswith", "strip", "swapcase", + "title", "translate", "upper", "zfill"] if __name__ == "__main__": unittest.main(verbosity=2) From 9a87329d690db98bd3594fb122f43de849e3c8b1 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 28 Feb 2013 10:58:19 -0500 Subject: [PATCH 025/115] More tests for test_other_methods() --- mwparserfromhell/string_mixin.py | 7 ++++--- tests/test_string_mixin.py | 40 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index ac47251..6490051 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -179,9 +179,10 @@ class StringMixIn(object): def isalpha(self): return self.__unicode__().isalpha() - @inheritdoc - def isdecimal(self): - return self.__unicode__().isdecimal() + if py3k: + @inheritdoc + def isdecimal(self): + return self.__unicode__().isdecimal() @inheritdoc def isdigit(self): diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 74da9ff..4e4fa68 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -174,7 +174,11 @@ class TestStringMixIn(unittest.TestCase): if not py3k: self.assertEquals(fstr, fstr.decode()) - self.assertEquals("𐌲𐌿𐍄", '\\U00010332\\U0001033f\\U00010344'.decode("unicode_escape")) + actual = '\\U00010332\\U0001033f\\U00010344' + self.assertEquals("𐌲𐌿𐍄", actual.decode("unicode_escape")) + self.assertEquals("𐌲", '\\U00010332'.decode("unicode_escape")) + self.assertRaises(UnicodeError, "fo".decode, "punycode") + self.assertEquals("", "fo".decode("punycode", "ignore")) self.assertEquals(b"fake string", fstr.encode()) self.assertEquals(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", @@ -187,8 +191,40 @@ class TestStringMixIn(unittest.TestCase): self.assertTrue(fstr.endswith("ing")) self.assertFalse(fstr.endswith("ingh")) + self.assertEquals("fake string", fstr) + self.assertEquals(" foobar", "\tfoobar".expandtabs()) + self.assertEquals(" foobar", "\tfoobar".expandtabs(4)) + + self.assertEquals(3, fstr.find("e")) + self.assertEquals(-1, fstr.find("z")) + self.assertEquals(7, fstr.find("r", 7)) + self.assertEquals(-1, fstr.find("r", 8)) + self.assertEquals(7, fstr.find("r", 5, 9)) + self.assertEquals(-1, fstr.find("r", 5, 7)) + + self.assertEquals("fake string", fstr.format()) + self.assertEquals("foobarbaz", "foo{0}baz".format("bar")) + self.assertEquals("foobarbaz", "foo{abc}baz".format(abc="bar")) + self.assertEquals("foobarbazbuzz", + "foo{0}{abc}buzz".format("bar", abc="baz")) + self.assertRaises(IndexError, "{0}{1}".format, "abc") + + self.assertEquals(3, fstr.index("e")) + self.assertRaises(ValueError, fstr.index, "z") + self.assertEquals(7, fstr.index("r", 7)) + self.assertRaises(ValueError, fstr.index, "r", 8) + self.assertEquals(7, fstr.index("r", 5, 9)) + self.assertRaises(ValueError, fstr.index, "r", 5, 7) + + self.assertTrue("foobar".isalnum()) + self.assertTrue("foobar123".isalnum()) + self.assertFalse("foo bar".isalnum()) + + self.assertTrue("foobar".isalpha()) + self.assertFalse("foobar123".isalpha()) + self.assertFalse("foo bar".isalpha()) + methods = [ - "expandtabs", "find", "format", "index", "isalnum", "isalpha", "isdecimal", "isdigit", "islower", "isnumeric", "isspace", "istitle", "isupper", "join", "ljust", "lstrip", "partition", "replace", "rfind", "rindex", "rjust", "rpartition", "rsplit", From 5a0a00ba98f0edde985239cc4717e70c0d37c618 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 3 Mar 2013 20:29:34 -0500 Subject: [PATCH 026/115] Change the way verify_safe() handles template params (#25). - Newlines are now allowed in template param names. - Changes also affect handling of arguments like {{{foo}}}. - Update unit tests: remove some unnecessary ones, and add some to cover the changes. - Update StringMixIn tests to actually work for some of the methods. - Update copyright notices for the C extensions. --- mwparserfromhell/parser/tokenizer.c | 2 +- mwparserfromhell/parser/tokenizer.h | 2 +- mwparserfromhell/parser/tokenizer.py | 70 ++--- tests/test_string_mixin.py | 166 ++++++----- tests/tokenizer/templates.mwtest | 540 +++++++---------------------------- 5 files changed, 235 insertions(+), 545 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index d82b080..6716698 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -1,6 +1,6 @@ /* Tokenizer for MWParserFromHell -Copyright (C) 2012 Ben Kurtovic +Copyright (C) 2012-2013 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index af86321..8d51013 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -1,6 +1,6 @@ /* Tokenizer Header File for MWParserFromHell -Copyright (C) 2012 Ben Kurtovic +Copyright (C) 2012-2013 Ben Kurtovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index a365db8..67638ca 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -396,34 +396,42 @@ class Tokenizer(object): """Make sure we are not trying to write an invalid character.""" context = self._context if context & contexts.FAIL_NEXT: - self._fail_route() + return False if context & contexts.WIKILINK_TITLE: if this == "]" or this == "{": self._context |= contexts.FAIL_NEXT elif this == "\n" or this == "[" or this == "}": - self._fail_route() - return + return False + return True if context & contexts.TEMPLATE_NAME: if this == "{" or this == "}" or this == "[": self._context |= contexts.FAIL_NEXT - return + return True if this == "]": - self._fail_route() - return + return False if this == "|": - return - elif context & (contexts.TEMPLATE_PARAM_KEY | contexts.ARGUMENT_NAME): + return True + if context & contexts.HAS_TEXT: + if context & contexts.FAIL_ON_TEXT: + if this is self.END or not this.isspace(): + return False + else: + if this == "\n": + self._context |= contexts.FAIL_ON_TEXT + elif this is not self.END or not this.isspace(): + self._context |= contexts.HAS_TEXT + return True + else: if context & contexts.FAIL_ON_EQUALS: if this == "=": - self._fail_route() - return + return False elif context & contexts.FAIL_ON_LBRACE: - if this == "{": + if this == "{" or (self._read(-1) == self._read(-2) == "{"): if context & contexts.TEMPLATE: self._context |= contexts.FAIL_ON_EQUALS else: self._context |= contexts.FAIL_NEXT - return + return True self._context ^= contexts.FAIL_ON_LBRACE elif context & contexts.FAIL_ON_RBRACE: if this == "}": @@ -431,40 +439,13 @@ class Tokenizer(object): self._context |= contexts.FAIL_ON_EQUALS else: self._context |= contexts.FAIL_NEXT - return + return True self._context ^= contexts.FAIL_ON_RBRACE elif this == "{": self._context |= contexts.FAIL_ON_LBRACE elif this == "}": self._context |= contexts.FAIL_ON_RBRACE - if context & contexts.HAS_TEXT: - if context & contexts.FAIL_ON_TEXT: - if this is self.END or not this.isspace(): - if context & contexts.TEMPLATE_PARAM_KEY: - self._context ^= contexts.FAIL_ON_TEXT - self._context |= contexts.FAIL_ON_EQUALS - else: - self._fail_route() - return - else: - if this == "\n": - self._context |= contexts.FAIL_ON_TEXT - elif this is self.END or not this.isspace(): - self._context |= contexts.HAS_TEXT - - def _reset_safety_checks(self): - """Unset any safety-checking contexts set by Tokenizer_verify_safe(). - - Used when we preserve a context but previous data becomes invalid, like - when moving between template parameters. - """ - context = self._context - checks = (contexts.HAS_TEXT, contexts.FAIL_ON_TEXT, contexts.FAIL_NEXT, - contexts.FAIL_ON_LBRACE, contexts.FAIL_ON_RBRACE, - contexts.FAIL_ON_EQUALS) - for check in checks: - if context & check: - self._context ^= check; + return True def _parse(self, context=0): """Parse the wikicode string, using *context* for when to stop.""" @@ -474,12 +455,10 @@ class Tokenizer(object): unsafe = (contexts.TEMPLATE_NAME | contexts.WIKILINK_TITLE | contexts.TEMPLATE_PARAM_KEY | contexts.ARGUMENT_NAME) if self._context & unsafe: - try: - self._verify_safe(this) - except BadRoute: + if not self._verify_safe(this): if self._context & contexts.TEMPLATE_PARAM_KEY: self._pop() - raise + self._fail_route() if this not in self.MARKERS: self._write_text(this) self._head += 1 @@ -504,7 +483,6 @@ class Tokenizer(object): if self._context & contexts.FAIL_NEXT: self._context ^= contexts.FAIL_NEXT elif this == "|" and self._context & contexts.TEMPLATE: - self._reset_safety_checks() self._handle_template_param() elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY: self._handle_template_param_value() diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 4e4fa68..43a9e9a 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -42,12 +42,14 @@ class TestStringMixIn(unittest.TestCase): methods = [ "capitalize", "center", "count", "encode", "endswith", "expandtabs", "find", "format", "index", "isalnum", "isalpha", - "isdecimal", "isdigit", "islower", "isnumeric", "isspace", - "istitle", "isupper", "join", "ljust", "lstrip", "partition", - "replace", "rfind", "rindex", "rjust", "rpartition", "rsplit", - "rstrip", "split", "splitlines", "startswith", "strip", "swapcase", - "title", "translate", "upper", "zfill"] - if not py3k: + "isdigit", "islower", "isnumeric", "isspace", "istitle", "isupper", + "join", "ljust", "lstrip", "partition", "replace", "rfind", + "rindex", "rjust", "rpartition", "rsplit", "rstrip", "split", + "splitlines", "startswith", "strip", "swapcase", "title", + "translate", "upper", "zfill"] + if py3k: + methods.append("isdecimal") + else: methods.append("decode") for meth in methods: expected = getattr(str, meth).__doc__ @@ -157,75 +159,107 @@ class TestStringMixIn(unittest.TestCase): def test_other_methods(self): """test the remaining non-magic methods of StringMixIn""" - fstr = _FakeString("fake string") - - self.assertEquals("Fake string", fstr.capitalize()) + str1 = _FakeString("fake string") + self.assertEquals("Fake string", str1.capitalize()) - self.assertEquals(" fake string ", fstr.center(15)) - self.assertEquals(" fake string ", fstr.center(16)) - self.assertEquals("qqfake stringqq", fstr.center(15, "q")) + self.assertEquals(" fake string ", str1.center(15)) + self.assertEquals(" fake string ", str1.center(16)) + self.assertEquals("qqfake stringqq", str1.center(15, "q")) - self.assertEquals(1, fstr.count("e")) - self.assertEquals(0, fstr.count("z")) - self.assertEquals(1, fstr.count("r", 7)) - self.assertEquals(0, fstr.count("r", 8)) - self.assertEquals(1, fstr.count("r", 5, 9)) - self.assertEquals(0, fstr.count("r", 5, 7)) + self.assertEquals(1, str1.count("e")) + self.assertEquals(0, str1.count("z")) + self.assertEquals(1, str1.count("r", 7)) + self.assertEquals(0, str1.count("r", 8)) + self.assertEquals(1, str1.count("r", 5, 9)) + self.assertEquals(0, str1.count("r", 5, 7)) if not py3k: - self.assertEquals(fstr, fstr.decode()) - actual = '\\U00010332\\U0001033f\\U00010344' + str2 = _FakeString("fo") + self.assertEquals(str1, str1.decode()) + actual = _FakeString("\\U00010332\\U0001033f\\U00010344") self.assertEquals("𐌲𐌿𐍄", actual.decode("unicode_escape")) - self.assertEquals("𐌲", '\\U00010332'.decode("unicode_escape")) - self.assertRaises(UnicodeError, "fo".decode, "punycode") - self.assertEquals("", "fo".decode("punycode", "ignore")) + self.assertRaises(UnicodeError, str2.decode, "punycode") + self.assertEquals("", str2.decode("punycode", "ignore")) - self.assertEquals(b"fake string", fstr.encode()) + str3 = _FakeString("𐌲𐌿𐍄") + self.assertEquals(b"fake string", str1.encode()) self.assertEquals(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", - "𐌲𐌿𐍄".encode("utf8")) - self.assertRaises(UnicodeEncodeError, "𐌲𐌿𐍄".encode) - self.assertRaises(UnicodeEncodeError, "𐌲𐌿𐍄".encode, "ascii") - self.assertRaises(UnicodeEncodeError, "𐌲𐌿𐍄".encode, "ascii", "strict") - self.assertEquals("", "𐌲𐌿𐍄".encode("ascii", "ignore")) - - self.assertTrue(fstr.endswith("ing")) - self.assertFalse(fstr.endswith("ingh")) - - self.assertEquals("fake string", fstr) - self.assertEquals(" foobar", "\tfoobar".expandtabs()) - self.assertEquals(" foobar", "\tfoobar".expandtabs(4)) - - self.assertEquals(3, fstr.find("e")) - self.assertEquals(-1, fstr.find("z")) - self.assertEquals(7, fstr.find("r", 7)) - self.assertEquals(-1, fstr.find("r", 8)) - self.assertEquals(7, fstr.find("r", 5, 9)) - self.assertEquals(-1, fstr.find("r", 5, 7)) - - self.assertEquals("fake string", fstr.format()) - self.assertEquals("foobarbaz", "foo{0}baz".format("bar")) - self.assertEquals("foobarbaz", "foo{abc}baz".format(abc="bar")) - self.assertEquals("foobarbazbuzz", - "foo{0}{abc}buzz".format("bar", abc="baz")) - self.assertRaises(IndexError, "{0}{1}".format, "abc") - - self.assertEquals(3, fstr.index("e")) - self.assertRaises(ValueError, fstr.index, "z") - self.assertEquals(7, fstr.index("r", 7)) - self.assertRaises(ValueError, fstr.index, "r", 8) - self.assertEquals(7, fstr.index("r", 5, 9)) - self.assertRaises(ValueError, fstr.index, "r", 5, 7) - - self.assertTrue("foobar".isalnum()) - self.assertTrue("foobar123".isalnum()) - self.assertFalse("foo bar".isalnum()) - - self.assertTrue("foobar".isalpha()) - self.assertFalse("foobar123".isalpha()) - self.assertFalse("foo bar".isalpha()) + str3.encode("utf8")) + self.assertRaises(UnicodeEncodeError, str3.encode) + self.assertRaises(UnicodeEncodeError, str3.encode, "ascii") + self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict") + self.assertEquals("", str3.encode("ascii", "ignore")) + + self.assertTrue(str1.endswith("ing")) + self.assertFalse(str1.endswith("ingh")) + + str4 = _FakeString("\tfoobar") + self.assertEquals("fake string", str1) + self.assertEquals(" foobar", str4.expandtabs()) + self.assertEquals(" foobar", str4.expandtabs(4)) + + self.assertEquals(3, str1.find("e")) + self.assertEquals(-1, str1.find("z")) + self.assertEquals(7, str1.find("r", 7)) + self.assertEquals(-1, str1.find("r", 8)) + self.assertEquals(7, str1.find("r", 5, 9)) + self.assertEquals(-1, str1.find("r", 5, 7)) + + str5 = _FakeString("foo{0}baz") + str6 = _FakeString("foo{abc}baz") + str7 = _FakeString("foo{0}{abc}buzz") + str8 = _FakeString("{0}{1}") + self.assertEquals("fake string", str1.format()) + self.assertEquals("foobarbaz", str5.format("bar")) + self.assertEquals("foobarbaz", str6.format(abc="bar")) + self.assertEquals("foobarbazbuzz", str7.format("bar", abc="baz")) + self.assertRaises(IndexError, str8.format, "abc") + + self.assertEquals(3, str1.index("e")) + self.assertRaises(ValueError, str1.index, "z") + self.assertEquals(7, str1.index("r", 7)) + self.assertRaises(ValueError, str1.index, "r", 8) + self.assertEquals(7, str1.index("r", 5, 9)) + self.assertRaises(ValueError, str1.index, "r", 5, 7) + + str9 = _FakeString("foobar") + str10 = _FakeString("foobar123") + str11 = _FakeString("foo bar") + self.assertTrue(str9.isalnum()) + self.assertTrue(str10.isalnum()) + self.assertFalse(str11.isalnum()) + + self.assertTrue(str9.isalpha()) + self.assertFalse(str10.isalpha()) + self.assertFalse(str11.isalpha()) + + str12 = _FakeString("123") + str13 = _FakeString("\u2155") + str14 = _FakeString("\u00B2") + if py3k: + self.assertFalse(str9.isdecimal()) + self.assertTrue(str12.isdecimal()) + self.assertFalse(str13.isdecimal()) + self.assertFalse(str14.isdecimal()) + + self.assertFalse(str9.isdigit()) + self.assertTrue(str12.isdigit()) + self.assertFalse(str13.isdigit()) + self.assertTrue(str14.isdigit()) + + str15 = _FakeString("") + str16 = _FakeString("FooBar") + self.assertTrue(str9.islower()) + self.assertFalse(str15.islower()) + self.assertFalse(str16.islower()) + + self.assertFalse(str9.isnumeric()) + self.assertTrue(str12.isnumeric()) + self.assertTrue(str13.isnumeric()) + self.assertTrue(str14.isnumeric()) methods = [ - "isdecimal", "isdigit", "islower", "isnumeric", "isspace", + "isspace", "istitle", "isupper", "join", "ljust", "lstrip", "partition", "replace", "rfind", "rindex", "rjust", "rpartition", "rsplit", "rstrip", "split", "splitlines", "startswith", "strip", "swapcase", diff --git a/tests/tokenizer/templates.mwtest b/tests/tokenizer/templates.mwtest index d699ef2..fa3c0a4 100644 --- a/tests/tokenizer/templates.mwtest +++ b/tests/tokenizer/templates.mwtest @@ -215,521 +215,150 @@ output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(te --- -name: newline_start +name: newlines_start label: a newline at the start of a template name input: "{{\nfoobar}}" output: [TemplateOpen(), Text(text="\nfoobar"), TemplateClose()] --- -name: newline_end +name: newlines_end label: a newline at the end of a template name input: "{{foobar\n}}" output: [TemplateOpen(), Text(text="foobar\n"), TemplateClose()] --- -name: newline_start_end +name: newlines_start_end label: a newline at the start and end of a template name input: "{{\nfoobar\n}}" output: [TemplateOpen(), Text(text="\nfoobar\n"), TemplateClose()] --- -name: newline_mid +name: newlines_mid label: a newline at the middle of a template name input: "{{foo\nbar}}" output: [Text(text="{{foo\nbar}}")] --- -name: newline_start_mid +name: newlines_start_mid label: a newline at the start and middle of a template name input: "{{\nfoo\nbar}}" output: [Text(text="{{\nfoo\nbar}}")] --- -name: newline_mid_end +name: newlines_mid_end label: a newline at the middle and end of a template name input: "{{foo\nbar\n}}" output: [Text(text="{{foo\nbar\n}}")] --- -name: newline_start_mid_end +name: newlines_start_mid_end label: a newline at the start, middle, and end of a template name input: "{{\nfoo\nbar\n}}" output: [Text(text="{{\nfoo\nbar\n}}")] --- -name: newline_unnamed_param_start -label: a newline at the start of an unnamed template parameter -input: "{{foo|\nbar}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateClose()] - ---- - -name: newline_unnamed_param_end -label: a newline at the end of an unnamed template parameter -input: "{{foo|bar\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateClose()] - ---- - -name: newline_unnamed_param_start_end -label: a newline at the start and end of an unnamed template parameter -input: "{{foo|\nbar\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateClose()] - ---- - -name: newline_unnamed_param_start_mid -label: a newline at the start and middle of an unnamed template parameter -input: "{{foo|\nb\nar}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar"), TemplateClose()] - ---- - -name: newline_unnamed_param_mid_end -label: a newline at the middle and end of an unnamed template parameter -input: "{{foo|b\nar\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="b\nar\n"), TemplateClose()] - ---- - -name: newline_unnamed_param_start_mid_end -label: a newline at the start, middle, and end of an unnamed template parameter +name: newlines_unnamed_param +label: newlines within an unnamed template parameter input: "{{foo|\nb\nar\n}}" output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()] --- -name: newline_start_unnamed_param_start -label: a newline at the start of a template name and at the start of an unnamed template parameter -input: "{{\nfoo|\nbar}}" -output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateClose()] - ---- - -name: newline_start_unnamed_param_end -label: a newline at the start of a template name and at the end of an unnamed template parameter -input: "{{\nfoo|bar\n}}" -output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateClose()] - ---- - -name: newline_start_unnamed_param_start_end -label: a newline at the start of a template name and at the start and end of an unnamed template parameter -input: "{{\nfoo|\nbar\n}}" -output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateClose()] - ---- - -name: newline_start_unnamed_param_start_mid -label: a newline at the start of a template name and at the start and middle of an unnamed template parameter -input: "{{\nfoo|\nb\nar}}" -output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="\nb\nar"), TemplateClose()] - ---- - -name: newline_start_unnamed_param_mid_end -label: a newline at the start of a template name and at the middle and end of an unnamed template parameter -input: "{{\nfoo|b\nar\n}}" -output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="b\nar\n"), TemplateClose()] - ---- - -name: newline_start_unnamed_param_start_mid_end -label: a newline at the start of a template name and at the start, middle, and end of an unnamed template parameter -input: "{{\nfoo|\nb\nar\n}}" -output: [TemplateOpen(), Text(text="\nfoo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()] - ---- - -name: newline_end_unnamed_param_start -label: a newline at the end of a template name and at the start of an unnamed template parameter -input: "{{foo\n|\nbar}}" -output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="\nbar"), TemplateClose()] - ---- - -name: newline_end_unnamed_param_end -label: a newline at the end of a template name and at the end of an unnamed template parameter -input: "{{foo\n|bar\n}}" -output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="bar\n"), TemplateClose()] - ---- - -name: newline_end_unnamed_param_start_end -label: a newline at the end of a template name and at the start and end of an unnamed template parameter -input: "{{foo\n|\nbar\n}}" -output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateClose()] - ---- - -name: newline_end_unnamed_param_start_mid -label: a newline at the end of a template name and at the start and middle of an unnamed template parameter -input: "{{foo\n|\nb\nar}}" -output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="\nb\nar"), TemplateClose()] - ---- - -name: newline_end_unnamed_param_mid_end -label: a newline at the end of a template name and at the middle and end of an unnamed template parameter -input: "{{foo\n|b\nar\n}}" -output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="b\nar\n"), TemplateClose()] - ---- - -name: newline_end_unnamed_param_start_mid_end -label: a newline at the end of a template name and at the start, middle, and end of an unnamed template parameter -input: "{{foo\n|\nb\nar\n}}" -output: [TemplateOpen(), Text(text="foo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()] - ---- - -name: newline_start_end_unnamed_param_end -label: a newline at the start and end of a template name and the start of an unnamed template parameter -input: "{{\nfoo\n|\nbar}}" -output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nbar"), TemplateClose()] - ---- - -name: newline_start_end_unnamed_param_end -label: a newline at the start and end of a template name and the end of an unnamed template parameter -input: "{{\nfoo\n|bar\n}}" -output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="bar\n"), TemplateClose()] - ---- - -name: newline_start_end_unnamed_param_start_end -label: a newline at the start and end of a template name and the start and end of an unnamed template parameter -input: "{{\nfoo\n|\nbar\n}}" -output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateClose()] - ---- - -name: newline_start_end_unnamed_param_start_mid -label: a newline at the start and end of a template name and the start and middle of an unnamed template parameter -input: "{{\nfoo\n|\nb\nar}}" -output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar"), TemplateClose()] - ---- - -name: newline_start_end_unnamed_param_mid_end -label: a newline at the start and end of a template name and the middle and end of an unnamed template parameter -input: "{{\nfoo\n|b\nar\n}}" -output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="b\nar\n"), TemplateClose()] - ---- - -name: newline_start_end_unnamed_param_start_mid_end -label: a newline at the start and end of a template name and the start, middle, and end of an unnamed template parameter +name: newlines_enclose_template_name_unnamed_param +label: newlines enclosing a template name and within an unnamed template parameter input: "{{\nfoo\n|\nb\nar\n}}" output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateClose()] --- -name: newline_mid_unnamed_param_start -label: a newline at the middle of a template name and at the start of an unnamed template parameter -input: "{{f\noo|\nbar}}" -output: [Text(text="{{f\noo|\nbar}}")] - ---- - -name: newline_start_mid_unnamed_param_start -label: a newline at the start and middle of a template name and at the start of an unnamed template parameter -input: "{{\nf\noo|\nbar}}" -output: [Text(text="{{\nf\noo|\nbar}}")] - ---- - -name: newline_start_end_unnamed_param_start -label: a newline at the middle and of a template name and at the start of an unnamed template parameter -input: "{{f\noo\n|\nbar}}" -output: [Text(text="{{f\noo\n|\nbar}}")] - ---- - -name: newline_start_mid_end_unnamed_param_start -label: a newline at the start, middle, and end of a template name and at the start of an unnamed template parameter -input: "{{\nf\noo\n|\nbar}}" -output: [Text(text="{{\nf\noo\n|\nbar}}")] - ---- - -name: newline_named_param_value_start -label: a newline at the start of a named parameter value -input: "{{foo|1=\nbar}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nbar"), TemplateClose()] - ---- - -name: newline_named_param_value_end -label: a newline at the end of a named parameter value -input: "{{foo|1=bar\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="bar\n"), TemplateClose()] - ---- - -name: newline_named_param_value_start_end -label: a newline at the start and end of a named parameter value -input: "{{foo|1=\nbar\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nbar\n"), TemplateClose()] - ---- - -name: newline_named_param_value_start_mid -label: a newline at the start and middle of a named parameter value -input: "{{foo|1=\nb\nar}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nb\nar"), TemplateClose()] - ---- - -name: newline_named_param_value_mid_end -label: a newline at the middle and end of a named parameter value -input: "{{foo|1=b\nar\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="b\nar\n"), TemplateClose()] - ---- - -name: newline_named_param_value_start_mid_end -label: a newline at the start, middle, and end of a named parameter value -input: "{{foo|1=\nb\nar\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nb\nar\n"), TemplateClose()] - ---- - -name: newline_named_param_name_start -label: a newline at the start of a parameter name -input: "{{foo|\nbar=baz}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] - ---- - -name: newline_named_param_name_end -label: a newline at the end of a parameter name -input: "{{foo|bar\n=baz}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] - ---- - -name: newline_named_param_name_start_end -label: a newline at the start and end of a parameter name -input: "{{foo|\nbar\n=baz}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] +name: newlines_within_template_name_unnamed_param +label: newlines within a template name and within an unnamed template parameter +input: "{{\nfo\no\n|\nb\nar\n}}" +output: [Text(text="{{\nfo\no\n|\nb\nar\n}}")] --- -name: newline_named_param_name_mid -label: a newline at the middle of a parameter name -input: "{{foo|b\nar=baz}}" -output: [Text(text="{{foo|b\nar=baz}}")] +name: newlines_enclose_template_name_named_param_value +label: newlines enclosing a template name and within a named parameter value +input: "{{\nfoo\n|1=\nb\nar\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="1"), TemplateParamEquals(), Text(text="\nb\nar\n"), TemplateClose()] --- -name: newline_named_param_name_start_mid -label: a newline at the start and middle of a parameter name -input: "{{foo|\nb\nar=baz}}" -output: [Text(text="{{foo|\nb\nar=baz}}")] +name: newlines_within_template_name_named_param_value +label: newlines within a template name and within a named parameter value +input: "{{\nf\noo\n|1=\nb\nar\n}}" +output: [Text(text="{{\nf\noo\n|1=\nb\nar\n}}")] --- -name: newline_named_param_name_mid_end -label: a newline at the middle and end of a parameter name -input: "{{foo|b\nar\n=baz}}" -output: [Text(text="{{foo|b\nar\n=baz}}")] - ---- - -name: newline_named_param_name_start_mid_end -label: a newline at the start, middle, and end of a parameter name +name: newlines_named_param_name +label: newlines within a parameter name input: "{{foo|\nb\nar\n=baz}}" -output: [Text(text="{{foo|\nb\nar\n=baz}}")] - ---- - -name: newline_named_param_name_start_param_value_end -label: a newline at the start of a parameter name and the end of a parameter value -input: "{{foo|\nbar=baz\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="baz\n"), TemplateClose()] - ---- - -name: newline_named_param_name_end_param_value_end -label: a newline at the end of a parameter name and the end of a parameter value -input: "{{foo|bar\n=baz\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="baz\n"), TemplateClose()] - ---- - -name: newline_named_param_name_start_end_param_value_end -label: a newline at the start and end of a parameter name and the end of a parameter value -input: "{{foo|\nbar\n=baz\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="baz\n"), TemplateClose()] - ---- - -name: newline_named_param_name_start_mid_param_value_end -label: a newline at the start and middle of a parameter name and the end of a parameter value -input: "{{foo|\nb\nar=baz\n}}" -output: [Text(text="{{foo|\nb\nar=baz\n}}")] - ---- - -name: newline_named_param_name_mid_end_param_value_end -label: a newline at the middle and end of a parameter name and the end of a parameter value -input: "{{foo|b\nar\n=baz\n}}" -output: [Text(text="{{foo|b\nar\n=baz\n}}")] - ---- - -name: newline_named_param_name_start_mid_end_param_value_end -label: a newline at the start, middle, and end of a parameter name and at the end of a parameter value -input: "{{foo|\nb\nar\n=baz\n}}" -output: [Text(text="{{foo|\nb\nar\n=baz\n}}")] - ---- - -name: newline_named_param_name_start_param_value_start -label: a newline at the start of a parameter name and at the start of a parameter value -input: "{{foo|\nbar=\nbaz}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="\nbaz"), TemplateClose()] - ---- - -name: newline_named_param_name_end_param_value_start -label: a newline at the end of a parameter name and at the start of a parameter value -input: "{{foo|bar\n=\nbaz}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="\nbaz"), TemplateClose()] - ---- - -name: newline_named_param_name_start_end_param_value_start -label: a newline at the start and end of a parameter name and at the start of a parameter value -input: "{{foo|\nbar\n=\nbaz}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="\nbaz"), TemplateClose()] - ---- - -name: newline_named_param_name_start_mid_param_value_start -label: a newline at the start and middle of a parameter name and at the start of a parameter value -input: "{{foo|\nb\nar=\nbaz}}" -output: [Text(text="{{foo|\nb\nar=\nbaz}}")] - ---- - -name: newline_named_param_name_mid_end_param_value_start -label: a newline at the middle and end of a parameter name and at the start of a parameter value -input: "{{foo|b\nar\n=\nbaz}}" -output: [Text(text="{{foo|b\nar\n=\nbaz}}")] +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] --- -name: newline_named_param_name_start_mid_end_param_value_start -label: a newline at the start, middle, and end of a parameter name and at the start of a parameter value -input: "{{foo|\nb\nar\n=\nbaz}}" -output: [Text(text="{{foo|\nb\nar\n=\nbaz}}")] +name: newlines_named_param_name_param_value +label: newlines within a parameter name and within a parameter value +input: "{{foo|\nb\nar\n=\nba\nz\n}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nba\nz\n"), TemplateClose()] --- -name: newline_named_param_name_start_param_value_start_end -label: a newline at the start of a parameter name and at the start and end of a parameter value -input: "{{foo|\nbar=\nbaz\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="\nbaz\n"), TemplateClose()] +name: newlines_enclose_template_name_named_param_name +label: newlines enclosing a template name and within a parameter name +input: "{{\nfoo\n|\nb\nar\n=baz}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] --- -name: newline_named_param_name_end_param_value_start_end -label: a newline at the end of a parameter name and at the start and end of a parameter value -input: "{{foo|bar\n=\nbaz\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="\nbaz\n"), TemplateClose()] +name: newlines_enclose_template_name_named_param_name_param_value +label: newlines enclosing a template name and within a parameter name and within a parameter value +input: "{{\nfoo\n|\nb\nar\n=\nba\nz\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nba\nz\n"), TemplateClose()] --- -name: newline_named_param_name_start_end_param_value_start_end -label: a newline at the start and end of a parameter name and at the start and end of a parameter value -input: "{{foo|\nbar\n=\nbaz\n}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="\nbaz\n"), TemplateClose()] +name: newlines_within_template_name_named_param_name +label: newlines within a template name and within a parameter name +input: "{{\nfo\no\n|\nb\nar\n=baz}}" +output: [Text(text="{{\nfo\no\n|\nb\nar\n=baz}}")] --- -name: newline_named_param_name_start_mid_param_value_start_end -label: a newline at the start and middle of a parameter name and at the start and end of a parameter value -input: "{{foo|\nb\nar=\nbaz\n}}" -output: [Text(text="{{foo|\nb\nar=\nbaz\n}}")] +name: newlines_within_template_name_named_param_name_param_value +label: newlines within a template name and within a parameter name and within a parameter value +input: "{{\nf\noo\n|\nb\nar\n=\nba\nz\n}}" +output: [Text(text="{{\nf\noo\n|\nb\nar\n=\nba\nz\n}}")] --- -name: newline_named_param_name_mid_end_param_value_start_end -label: a newline at the middle and end of a parameter name and at the start and end of a parameter value -input: "{{foo|b\nar\n=\nbaz\n}}" -output: [Text(text="{{foo|b\nar\n=\nbaz\n}}")] - ---- - -name: newline_named_param_name_start_mid_end_param_value_start_end -label: a newline at the start, middle, and end of a parameter name and at the start and end of a parameter value -input: "{{foo|\nb\nar\n=\nbaz\n}}" -output: [Text(text="{{foo|\nb\nar\n=\nbaz\n}}")] - ---- - -name: newline_named_param_name_start_param_value_mid -label: a newline at the start of a parameter name and at the middle of a parameter value -input: "{{foo|\nbar=ba\nz}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar"), TemplateParamEquals(), Text(text="ba\nz"), TemplateClose()] - ---- - -name: newline_named_param_name_end_param_value_mid -label: a newline at the end of a parameter name and at the middle of a parameter value -input: "{{foo|bar\n=ba\nz}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar\n"), TemplateParamEquals(), Text(text="ba\nz"), TemplateClose()] - ---- - -name: newline_named_param_name_start_end_param_value_mid -label: a newline at the start and end of a parameter name and at the middle of a parameter value -input: "{{foo|\nbar\n=ba\nz}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="ba\nz"), TemplateClose()] - ---- - -name: newline_named_param_name_start_mid_param_value_mid -label: a newline at the start and middle of a parameter name and at the middle of a parameter value -input: "{{foo|\nb\nar=ba\nz}}" -output: [Text(text="{{foo|\nb\nar=ba\nz}}")] - ---- - -name: newline_named_param_name_mid_end_param_value_mid -label: a newline at the middle and end of a parameter name and at the middle of a parameter value -input: "{{foo|b\nar\n=ba\nz}}" -output: [Text(text="{{foo|b\nar\n=ba\nz}}")] - ---- - -name: newline_named_param_start_mid_end_param_value_mid -label: a newline at the start, middle, and end of a parameter name and at the middle of a parameter value -input: "{{foo|\nb\nar\n=ba\nz}}" -output: [Text(text="{{foo|\nb\nar\n=ba\nz}}")] +name: newlines_wildcard +label: a random, complex assortment of templates and newlines +input: "{{\nfoo\n|\nb\nar\n=\nb\naz\n|\nb\nuz\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nb\nar\n"), TemplateParamEquals(), Text(text="\nb\naz\n"), TemplateParamSeparator(), Text(text="\nb\nuz\n"), TemplateClose()] --- -name: newline_wildcard -label: a random, complex assortment of templates and newlines -input: "{{\nfoo\n|\nbar\n=\nb\naz\n|\nb\nuz\n}}" -output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\nbar\n"), TemplateParamEquals(), Text(text="\nb\naz\n"), TemplateParamSeparator(), Text(text="\nb\nuz\n"), TemplateClose()] +name: newlines_wildcard_redux +label: an even more random and complex assortment of templates and newlines +input: "{{\nfoo\n|\n{{\nbar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}" +output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\n"), TemplateOpen(), Text(text="\nbar\n"), TemplateParamSeparator(), Text(text="\nb\naz\n"), TemplateParamEquals(), Text(text="\nb\niz\n"), TemplateClose(), Text(text="\n"), TemplateParamEquals(), Text(text="\nb\nuzz\n"), TemplateClose()] --- -name: newline_wildcard_redux -label: an even more random and complex assortment of templates and newlines -input: "{{\nfoo\n|\n{{\nbar\n|\nbaz\n=\nb\niz\n}}\n=\nb\nuzz\n}}" -output: [TemplateOpen(), Text(text="\nfoo\n"), TemplateParamSeparator(), Text(text="\n"), TemplateOpen(), Text(text="\nbar\n"), TemplateParamSeparator(), Text(text="\nbaz\n"), TemplateParamEquals(), Text(text="\nb\niz\n"), TemplateClose(), Text(text="\n"), TemplateParamEquals(), Text(text="\nb\nuzz\n"), TemplateClose()] +name: newlines_wildcard_redux_invalid +label: a variation of the newlines_wildcard_redux test that is invalid +input: "{{\nfoo\n|\n{{\nb\nar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}" +output: [Text(text="{{\nfoo\n|\n{{\nb\nar\n|\nb\naz\n=\nb\niz\n}}\n=\nb\nuzz\n}}")] --- @@ -812,8 +441,43 @@ output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text=" name: valid_param_name_brackets label: valid characters in unnamed template parameter: left and right brackets -input: "{{foo|ba[r]}}" -output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba[r]"), TemplateClose()] +input: "{{foo|ba[r]=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="ba[r]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: valid_param_name_double_left_brackets +label: valid characters in unnamed template parameter: double left brackets +input: "{{foo|bar[[in\nvalid=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar[[in\nvalid"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: valid_param_name_double_right_brackets +label: valid characters in unnamed template parameter: double right brackets +input: "{{foo|bar]]=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar]]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: valid_param_name_double_brackets +label: valid characters in unnamed template parameter: double left and right brackets +input: "{{foo|bar[[in\nvalid]]=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar[[in\nvalid]]"), TemplateParamEquals(), Text(text="baz"), TemplateClose()] + +--- + +name: invalid_param_name_double_left_braces +label: invalid characters in template parameter name: double left braces +input: "{{foo|bar{{in\nvalid=baz}}" +output: [Text(text="{{foo|bar{{in\nvalid=baz}}")] + +--- + +name: invalid_param_name_double_braces +label: invalid characters in template parameter name: double left and right braces +input: "{{foo|bar{{in\nvalid}}=baz}}" +output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text="bar{{in\nvalid"), TemplateClose(), Text(text="=baz}}")] --- @@ -919,3 +583,17 @@ name: incomplete_two_named_params label: incomplete templates that should fail gracefully: two named parameters with values input: "{{stuff}} {{foo|bar=baz|biz=buzz" output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar=baz|biz=buzz")] + +--- + +name: incomplete_nested_template_as_unnamed_param +label: incomplete templates that should fail gracefully: a valid nested template as an unnamed parameter +input: "{{stuff}} {{foo|{{bar}}" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|"), TemplateOpen(), Text(text="bar"), TemplateClose()] + +--- + +name: incomplete_nested_template_as_param_value +label: incomplete templates that should fail gracefully: a valid nested template as a parameter value +input: "{{stuff}} {{foo|bar={{baz}}" +output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar="), TemplateOpen(), Text(text="baz"), TemplateClose()] From 718fcb24c86415a5ec4f597d63dbe71ce3a49fea Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 8 Mar 2013 23:08:49 -0500 Subject: [PATCH 027/115] Fix eight failing tests; all template parsing tests now passing (#25). --- mwparserfromhell/parser/tokenizer.c | 90 +++++++++++++------------------------ mwparserfromhell/parser/tokenizer.h | 3 +- 2 files changed, 31 insertions(+), 62 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 6716698..3e6527a 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -1135,48 +1135,59 @@ Tokenizer_parse_comment(Tokenizer* self) } /* - Make sure we are not trying to write an invalid character. + Make sure we are not trying to write an invalid character. Return 0 if + everything is safe, or -1 if the route must be failed. */ -static void +static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) { if (context & LC_FAIL_NEXT) { - Tokenizer_fail_route(self); - return; + return -1; } if (context & LC_WIKILINK_TITLE) { if (data == *"]" || data == *"{") self->topstack->context |= LC_FAIL_NEXT; else if (data == *"\n" || data == *"[" || data == *"}") - Tokenizer_fail_route(self); - return; + return -1; + return 0; } if (context & LC_TEMPLATE_NAME) { if (data == *"{" || data == *"}" || data == *"[") { self->topstack->context |= LC_FAIL_NEXT; - return; + return 0; } if (data == *"]") { - Tokenizer_fail_route(self); - return; + return -1; } if (data == *"|") - return; + return 0; + + if (context & LC_HAS_TEXT) { + if (context & LC_FAIL_ON_TEXT) { + if (!Py_UNICODE_ISSPACE(data)) + return -1; + } + else { + if (data == *"\n") + self->topstack->context |= LC_FAIL_ON_TEXT; + } + } + else if (!Py_UNICODE_ISSPACE(data)) + self->topstack->context |= LC_HAS_TEXT; } - else if (context & (LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)) { + else { if (context & LC_FAIL_ON_EQUALS) { if (data == *"=") { - Tokenizer_fail_route(self); - return; + return -1; } } else if (context & LC_FAIL_ON_LBRACE) { - if (data == *"{") { + if (data == *"{" || (Tokenizer_READ(self, -1) == *"{" && Tokenizer_READ(self, -2) == *"{")) { if (context & LC_TEMPLATE) self->topstack->context |= LC_FAIL_ON_EQUALS; else self->topstack->context |= LC_FAIL_NEXT; - return; + return 0; } self->topstack->context ^= LC_FAIL_ON_LBRACE; } @@ -1186,7 +1197,7 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) self->topstack->context |= LC_FAIL_ON_EQUALS; else self->topstack->context |= LC_FAIL_NEXT; - return; + return 0; } self->topstack->context ^= LC_FAIL_ON_RBRACE; } @@ -1195,47 +1206,7 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) else if (data == *"}") self->topstack->context |= LC_FAIL_ON_RBRACE; } - if (context & LC_HAS_TEXT) { - if (context & LC_FAIL_ON_TEXT) { - if (!Py_UNICODE_ISSPACE(data)) { - if (context & LC_TEMPLATE_PARAM_KEY) { - self->topstack->context ^= LC_FAIL_ON_TEXT; - self->topstack->context |= LC_FAIL_ON_EQUALS; - } - else - Tokenizer_fail_route(self); - return; - } - } - else { - if (data == *"\n") - self->topstack->context |= LC_FAIL_ON_TEXT; - } - } - else if (!Py_UNICODE_ISSPACE(data)) - self->topstack->context |= LC_HAS_TEXT; -} - -/* - Unset any safety-checking contexts set by Tokenizer_verify_safe(). Used - when we preserve a context but previous data becomes invalid, like when - moving between template parameters. -*/ -static void -Tokenizer_reset_safety_checks(Tokenizer* self) -{ - static int checks[] = { - LC_HAS_TEXT, LC_FAIL_ON_TEXT, LC_FAIL_NEXT, LC_FAIL_ON_LBRACE, - LC_FAIL_ON_RBRACE, LC_FAIL_ON_EQUALS, 0}; - int context = self->topstack->context, i = 0, this; - while (1) { - this = checks[i]; - if (!this) - return; - if (context & this) - self->topstack->context ^= this; - i++; - } + return 0; } /* @@ -1258,12 +1229,12 @@ Tokenizer_parse(Tokenizer* self, int context) this = Tokenizer_READ(self, 0); this_context = self->topstack->context; if (this_context & unsafe_contexts) { - Tokenizer_verify_safe(self, this_context, this); - if (BAD_ROUTE) { + if (Tokenizer_verify_safe(self, this_context, this) < 0) { if (this_context & LC_TEMPLATE_PARAM_KEY) { trash = Tokenizer_pop(self); Py_XDECREF(trash); } + Tokenizer_fail_route(self); return NULL; } } @@ -1303,7 +1274,6 @@ Tokenizer_parse(Tokenizer* self, int context) self->topstack->context ^= LC_FAIL_NEXT; } else if (this == *"|" && this_context & LC_TEMPLATE) { - Tokenizer_reset_safety_checks(self); if (Tokenizer_handle_template_param(self)) return NULL; } diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index 8d51013..d4b42f5 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -206,8 +206,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer*); static int Tokenizer_really_parse_entity(Tokenizer*); static int Tokenizer_parse_entity(Tokenizer*); static int Tokenizer_parse_comment(Tokenizer*); -static void Tokenizer_verify_safe(Tokenizer*, int, Py_UNICODE); -static void Tokenizer_reset_safety_checks(Tokenizer*); +static int Tokenizer_verify_safe(Tokenizer*, int, Py_UNICODE); static PyObject* Tokenizer_parse(Tokenizer*, int); static PyObject* Tokenizer_tokenize(Tokenizer*, PyObject*); From 054a84afe09681974d8438c70c619b3a40be59ee Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 8 Mar 2013 23:17:08 -0500 Subject: [PATCH 028/115] A bit of misc cleanup. --- mwparserfromhell/parser/tokenizer.c | 40 ++++++++++++++++++++----------------- mwparserfromhell/parser/tokenizer.h | 4 ++++ 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 3e6527a..8c96500 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -843,7 +843,8 @@ Tokenizer_handle_heading_end(Tokenizer* self) self->head++; } current = log2(self->topstack->context / LC_HEADING_LEVEL_1) + 1; - level = current > best ? (best > 6 ? 6 : best) : (current > 6 ? 6 : current); + level = current > best ? (best > 6 ? 6 : best) : + (current > 6 ? 6 : current); after = (HeadingData*) Tokenizer_parse(self, self->topstack->context); if (BAD_ROUTE) { RESET_ROUTE(); @@ -956,11 +957,11 @@ Tokenizer_really_parse_entity(Tokenizer* self) else numeric = hexadecimal = 0; if (hexadecimal) - valid = "0123456789abcdefABCDEF"; + valid = HEXDIGITS; else if (numeric) - valid = "0123456789"; + valid = DIGITS; else - valid = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + valid = ALPHANUM; text = calloc(MAX_ENTITY_SIZE, sizeof(char)); if (!text) { PyErr_NoMemory(); @@ -1005,7 +1006,7 @@ Tokenizer_really_parse_entity(Tokenizer* self) i = 0; while (1) { def = entitydefs[i]; - if (!def) // We've reached the end of the def list without finding it + if (!def) // We've reached the end of the defs without finding it FAIL_ROUTE_AND_EXIT() if (strcmp(text, def) == 0) break; @@ -1161,7 +1162,6 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) } if (data == *"|") return 0; - if (context & LC_HAS_TEXT) { if (context & LC_FAIL_ON_TEXT) { if (!Py_UNICODE_ISSPACE(data)) @@ -1182,7 +1182,8 @@ Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) } } else if (context & LC_FAIL_ON_LBRACE) { - if (data == *"{" || (Tokenizer_READ(self, -1) == *"{" && Tokenizer_READ(self, -2) == *"{")) { + if (data == *"{" || (Tokenizer_READ(self, -1) == *"{" && + Tokenizer_READ(self, -2) == *"{")) { if (context & LC_TEMPLATE) self->topstack->context |= LC_FAIL_ON_EQUALS; else @@ -1375,7 +1376,8 @@ Tokenizer_tokenize(Tokenizer* self, PyObject* args) PyMODINIT_FUNC init_tokenizer(void) { - PyObject *module, *tempmodule, *defmap, *deflist, *globals, *locals, *fromlist, *modname; + PyObject *module, *tempmod, *defmap, *deflist, *globals, *locals, + *fromlist, *modname; unsigned numdefs, i; char *name; @@ -1386,13 +1388,13 @@ init_tokenizer(void) Py_INCREF(&TokenizerType); PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType); - tempmodule = PyImport_ImportModule("htmlentitydefs"); - if (!tempmodule) + tempmod = PyImport_ImportModule("htmlentitydefs"); + if (!tempmod) return; - defmap = PyObject_GetAttrString(tempmodule, "entitydefs"); + defmap = PyObject_GetAttrString(tempmod, "entitydefs"); if (!defmap) return; - Py_DECREF(tempmodule); + Py_DECREF(tempmod); deflist = PyDict_Keys(defmap); if (!deflist) return; @@ -1416,18 +1418,20 @@ init_tokenizer(void) if (!modname) return; PyList_SET_ITEM(fromlist, 0, modname); - tempmodule = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0); + tempmod = PyImport_ImportModuleLevel(name, globals, locals, fromlist, 0); Py_DECREF(fromlist); - if (!tempmodule) + if (!tempmod) return; - tokens = PyObject_GetAttrString(tempmodule, "tokens"); - Py_DECREF(tempmodule); + tokens = PyObject_GetAttrString(tempmod, "tokens"); + Py_DECREF(tempmod); Text = PyObject_GetAttrString(tokens, "Text"); TemplateOpen = PyObject_GetAttrString(tokens, "TemplateOpen"); - TemplateParamSeparator = PyObject_GetAttrString(tokens, "TemplateParamSeparator"); - TemplateParamEquals = PyObject_GetAttrString(tokens, "TemplateParamEquals"); + TemplateParamSeparator = PyObject_GetAttrString(tokens, + "TemplateParamSeparator"); + TemplateParamEquals = PyObject_GetAttrString(tokens, + "TemplateParamEquals"); TemplateClose = PyObject_GetAttrString(tokens, "TemplateClose"); ArgumentOpen = PyObject_GetAttrString(tokens, "ArgumentOpen"); diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index d4b42f5..693538c 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -36,6 +36,10 @@ SOFTWARE. #define malloc PyObject_Malloc #define free PyObject_Free +#define DIGITS "0123456789" +#define HEXDIGITS "0123456789abcdefABCDEF" +#define ALPHANUM "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + static const char* MARKERS[] = { "{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", "/", "-", "!", "\n", ""}; From fb71f5507eca7bc73fae764549a7579889817cba Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 14 Mar 2013 10:51:04 -0400 Subject: [PATCH 029/115] Support a 'use_c' field to explicitly disable the C tokenizer. --- mwparserfromhell/parser/__init__.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py index 3f034f6..1fb95b5 100644 --- a/mwparserfromhell/parser/__init__.py +++ b/mwparserfromhell/parser/__init__.py @@ -27,12 +27,15 @@ joins them together under one interface. """ from .builder import Builder +from .tokenizer import Tokenizer try: - from ._tokenizer import CTokenizer as Tokenizer + from ._tokenizer import CTokenizer + use_c = True except ImportError: - from .tokenizer import Tokenizer + CTokenizer = None + use_c = False -__all__ = ["Parser"] +__all__ = ["use_c", "Parser"] class Parser(object): """Represents a parser for wikicode. @@ -45,7 +48,10 @@ class Parser(object): def __init__(self, text): self.text = text - self._tokenizer = Tokenizer() + if use_c and CTokenizer: + self._tokenizer = CTokenizer() + else: + self._tokenizer = Tokenizer() self._builder = Builder() def parse(self): From 57d48f1adb8969e6a210fff5c613831d5e70b5e7 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 16 Mar 2013 18:15:00 -0400 Subject: [PATCH 030/115] More tests for StringMixIns. --- tests/test_string_mixin.py | 78 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 72 insertions(+), 6 deletions(-) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 43a9e9a..c0fe99d 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -258,12 +258,78 @@ class TestStringMixIn(unittest.TestCase): self.assertTrue(str13.isnumeric()) self.assertTrue(str14.isnumeric()) - methods = [ - "isspace", - "istitle", "isupper", "join", "ljust", "lstrip", "partition", - "replace", "rfind", "rindex", "rjust", "rpartition", "rsplit", - "rstrip", "split", "splitlines", "startswith", "strip", "swapcase", - "title", "translate", "upper", "zfill"] + str17 = _FakeString(" ") + str18 = _FakeString("\t \t \r\n") + self.assertFalse(str1.isspace()) + self.assertFalse(str9.isspace()) + self.assertTrue(str17.isspace()) + self.assertTrue(str18.isspace()) + + str19 = _FakeString("This Sentence Looks Like A Title") + str20 = _FakeString("This sentence doesn't LookLikeATitle") + self.assertFalse(str15.istitle()) + self.assertTrue(str19.istitle()) + self.assertFalse(str20.istitle()) + + str21 = _FakeString("FOOBAR") + self.assertFalse(str9.isupper()) + self.assertFalse(str15.isupper()) + self.assertTrue(str21.isupper()) + + self.assertEquals("foobar", str15.join(["foo", "bar"])) + self.assertEquals("foo123bar123baz", str12.join(("foo", "bar", "baz"))) + + self.assertEquals("fake string ", str1.ljust(15)) + self.assertEquals("fake string ", str1.ljust(16)) + self.assertEquals("fake stringqqqq", str1.ljust(15, "q")) + + # lstrip + + # partition + + # replace + + self.assertEquals(3, str1.rfind("e")) + self.assertEquals(-1, str1.rfind("z")) + self.assertEquals(7, str1.rfind("r", 7)) + self.assertEquals(-1, str1.rfind("r", 8)) + self.assertEquals(7, str1.rfind("r", 5, 9)) + self.assertEquals(-1, str1.rfind("r", 5, 7)) + + self.assertEquals(3, str1.rindex("e")) + self.assertRaises(ValueError, str1.rindex, "z") + self.assertEquals(7, str1.rindex("r", 7)) + self.assertRaises(ValueError, str1.rindex, "r", 8) + self.assertEquals(7, str1.rindex("r", 5, 9)) + self.assertRaises(ValueError, str1.rindex, "r", 5, 7) + + self.assertEquals(" fake string", str1.rjust(15)) + self.assertEquals(" fake string", str1.rjust(16)) + self.assertEquals("qqqqfake string", str1.rjust(15, "q")) + + # rpartition + + # rsplit + + # rstrip + + # split + + # splitlines + + # startswith + + # strip + + # swapcase + + # title + + # translate + + # upper + + # zfill if __name__ == "__main__": unittest.main(verbosity=2) From 6704bcd6621e73352b1a138c24d65e6c2ca09b56 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 16 Mar 2013 19:39:56 -0400 Subject: [PATCH 031/115] Document the remaining universal methods. Still need to do some py3k-only ones. --- mwparserfromhell/string_mixin.py | 11 ++-- tests/test_string_mixin.py | 117 +++++++++++++++++++++++++++------------ 2 files changed, 89 insertions(+), 39 deletions(-) diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index 6490051..2539f74 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -179,10 +179,9 @@ class StringMixIn(object): def isalpha(self): return self.__unicode__().isalpha() - if py3k: - @inheritdoc - def isdecimal(self): - return self.__unicode__().isdecimal() + @inheritdoc + def isdecimal(self): + return self.__unicode__().isdecimal() @inheritdoc def isdigit(self): @@ -231,7 +230,9 @@ class StringMixIn(object): return self.__unicode__().partition(sep) @inheritdoc - def replace(self, old, new, count): + def replace(self, old, new, count=None): + if count is None: + return self.__unicode__().replace(old, new) return self.__unicode__().replace(old, new, count) @inheritdoc diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index c0fe99d..455f2e6 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -42,13 +42,13 @@ class TestStringMixIn(unittest.TestCase): methods = [ "capitalize", "center", "count", "encode", "endswith", "expandtabs", "find", "format", "index", "isalnum", "isalpha", - "isdigit", "islower", "isnumeric", "isspace", "istitle", "isupper", - "join", "ljust", "lstrip", "partition", "replace", "rfind", - "rindex", "rjust", "rpartition", "rsplit", "rstrip", "split", - "splitlines", "startswith", "strip", "swapcase", "title", - "translate", "upper", "zfill"] + "isdecimal", "isdigit", "islower", "isnumeric", "isspace", + "istitle", "isupper", "join", "ljust", "lower", "lstrip", + "partition", "replace", "rfind", "rindex", "rjust", "rpartition", + "rsplit", "rstrip", "split", "splitlines", "startswith", "strip", + "swapcase", "title", "translate", "upper", "zfill"] if py3k: - methods.append("isdecimal") + methods.extend(["casefold", "format_map", "isidentifier", "isprintable", "maketrans"]) else: methods.append("decode") for meth in methods: @@ -236,11 +236,10 @@ class TestStringMixIn(unittest.TestCase): str12 = _FakeString("123") str13 = _FakeString("\u2155") str14 = _FakeString("\u00B2") - if py3k: - self.assertFalse(str9.isdecimal()) - self.assertTrue(str12.isdecimal()) - self.assertFalse(str13.isdecimal()) - self.assertFalse(str14.isdecimal()) + self.assertFalse(str9.isdecimal()) + self.assertTrue(str12.isdecimal()) + self.assertFalse(str13.isdecimal()) + self.assertFalse(str14.isdecimal()) self.assertFalse(str9.isdigit()) self.assertTrue(str12.isdigit()) @@ -283,11 +282,20 @@ class TestStringMixIn(unittest.TestCase): self.assertEquals("fake string ", str1.ljust(16)) self.assertEquals("fake stringqqqq", str1.ljust(15, "q")) - # lstrip + self.assertEquals("", str15.lower()) + self.assertEquals("foobar", str16.lower()) + + str22 = _FakeString(" fake string ") + self.assertEquals("fake string", str1.lstrip()) + self.assertEquals("fake string ", str22.lstrip()) + self.assertEquals("ke string", str1.lstrip("abcdef")) - # partition + self.assertEquals(("fa", "ke", " string"), str1.partition("ke")) + self.assertEquals(("fake string", "", ""), str1.partition("asdf")) - # replace + str23 = _FakeString("boo foo moo") + self.assertEquals("real string", str1.replace("fake", "real")) + self.assertEquals("bu fu moo", str23.replace("oo", "u", 2)) self.assertEquals(3, str1.rfind("e")) self.assertEquals(-1, str1.rfind("z")) @@ -307,29 +315,70 @@ class TestStringMixIn(unittest.TestCase): self.assertEquals(" fake string", str1.rjust(16)) self.assertEquals("qqqqfake string", str1.rjust(15, "q")) - # rpartition - - # rsplit - - # rstrip - - # split - - # splitlines + self.assertEquals(("fa", "ke", " string"), str1.rpartition("ke")) + self.assertEquals(("", "", "fake string"), str1.rpartition("asdf")) + + str24 = _FakeString(" this is a sentence with whitespace ") + actual = ["this", "is", "a", "sentence", "with", "whitespace"] + self.assertEquals(actual, str24.rsplit()) + self.assertEquals(actual, str24.rsplit(None)) + actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", + "", "whitespace", ""] + self.assertEquals(actual, str24.rsplit(" ")) + actual = [" this is a", "sentence", "with", "whitespace"] + self.assertEquals(actual, str24.rsplit(None, 3)) + actual = [" this is a sentence with", "", "whitespace", ""] + self.assertEquals(actual, str24.rsplit(" ", 3)) + + self.assertEquals("fake string", str1.rstrip()) + self.assertEquals(" fake string", str22.rstrip()) + self.assertEquals("fake stri", str1.rstrip("ngr")) + + actual = ["this", "is", "a", "sentence", "with", "whitespace"] + self.assertEquals(actual, str24.split()) + self.assertEquals(actual, str24.split(None)) + actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", + "", "whitespace", ""] + self.assertEquals(actual, str24.split(" ")) + actual = ["this", "is", "a", "sentence with whitespace "] + self.assertEquals(actual, str24.split(None, 3)) + actual = ["", "", "", "this is a sentence with whitespace "] + self.assertEquals(actual, str24.split(" ", 3)) + + str25 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") + self.assertEquals(["lines", "of", "text", "are", "presented", "here"], + str25.splitlines()) + self.assertEquals(["lines\n", "of\n", "text\r\n", "are\r\n", + "presented\n", "here"], str25.splitlines(True)) + + self.assertTrue(str1.startswith("fake")) + self.assertFalse(str1.startswith("faker")) + + self.assertEquals("fake string", str1.strip()) + self.assertEquals("fake string", str22.strip()) + self.assertEquals("ke stri", str1.strip("abcdefngr")) + + self.assertEquals("fOObAR", str16.swapcase()) + + self.assertEquals("Fake String", str1.title()) - # startswith - - # strip - - # swapcase - - # title - - # translate + if py3k: + table1 = str.maketrans({97: "1", 101: "2", 105: "3", 111: "4", + 117: "5"}) + table2 = str.maketrans("aeiou", "12345") + table3 = str.maketrans("aeiou", "12345", "rts") + self.assertEquals("f1k2 str3ng", str1.translate(table1)) + self.assertEquals("f1k2 str3ng", str1.translate(table2)) + self.assertEquals("f1k2 3ng", str1.translate(table3)) + else: + table = {97: "1", 101: "2", 105: "3", 111: "4", 117: "5"} + self.assertEquals("f1k2 str3ng", str1.translate(table)) - # upper + self.assertEquals("", str15.upper()) + self.assertEquals("FOOBAR", str16.upper()) - # zfill + self.assertEquals("123", str12.zfill(3)) + self.assertEquals("000123", str12.zfill(6)) if __name__ == "__main__": unittest.main(verbosity=2) From cf14b5ef4e02dadcba08083e62ceb800ec9edb6d Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 16 Mar 2013 19:55:25 -0400 Subject: [PATCH 032/115] Add some missing methods to StringMixIn. --- mwparserfromhell/string_mixin.py | 30 ++++++++++++++++++++++++++++++ tests/test_string_mixin.py | 3 ++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index 2539f74..9e6d551 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -122,6 +122,11 @@ class StringMixIn(object): def capitalize(self): return self.__unicode__().capitalize() + if py3k: + @inheritdoc + def casefold(self): + return self.__unicode__().casefold() + @inheritdoc def center(self, width, fillchar=None): if fillchar is None: @@ -167,6 +172,11 @@ class StringMixIn(object): def format(self, *args, **kwargs): return self.__unicode__().format(*args, **kwargs) + if py3k: + @inheritdoc + def format_map(self, mapping): + return self.__unicode__().format_map(mapping) + @inheritdoc def index(self, sub, start=None, end=None): return self.__unicode__().index(sub, start, end) @@ -187,6 +197,11 @@ class StringMixIn(object): def isdigit(self): return self.__unicode__().isdigit() + if py3k: + @inheritdoc + def isidentifier(self): + return self.__unicode__().isidentifier() + @inheritdoc def islower(self): return self.__unicode__().islower() @@ -195,6 +210,11 @@ class StringMixIn(object): def isnumeric(self): return self.__unicode__().isnumeric() + if py3k: + @inheritdoc + def isprintable(self): + return self.__unicode__().isprintable() + @inheritdoc def isspace(self): return self.__unicode__().isspace() @@ -225,6 +245,16 @@ class StringMixIn(object): def lstrip(self, chars=None): return self.__unicode__().lstrip(chars) + if py3k: + @inheritdoc + @staticmethod + def maketrans(self, x, y=None, z=None): + if z is None: + if y is None: + return self.__unicode__.maketrans(x) + return self.__unicode__.maketrans(x, y) + return self.__unicode__.maketrans(x, y, z) + @inheritdoc def partition(self, sep): return self.__unicode__().partition(sep) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 455f2e6..cff3848 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -48,7 +48,8 @@ class TestStringMixIn(unittest.TestCase): "rsplit", "rstrip", "split", "splitlines", "startswith", "strip", "swapcase", "title", "translate", "upper", "zfill"] if py3k: - methods.extend(["casefold", "format_map", "isidentifier", "isprintable", "maketrans"]) + methods.extend(["casefold", "format_map", "isidentifier", + "isprintable", "maketrans"]) else: methods.append("decode") for meth in methods: From 0af5894647c96d3d7cd2273aa2ddf30c864cdbca Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 18 Mar 2013 02:44:25 -0400 Subject: [PATCH 033/115] Finish tests for py3k-only methods in TestStringMixIn. --- tests/test_string_mixin.py | 65 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 20 deletions(-) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index cff3848..bf49629 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -216,6 +216,11 @@ class TestStringMixIn(unittest.TestCase): self.assertEquals("foobarbazbuzz", str7.format("bar", abc="baz")) self.assertRaises(IndexError, str8.format, "abc") + if py3k: + self.assertEquals("fake string", str1.format_map({})) + self.assertEquals("foobarbaz", str6.format_map({"abc": "bar"})) + self.assertRaises(ValueError, str5.format_map, {0: "abc"}) + self.assertEquals(3, str1.index("e")) self.assertRaises(ValueError, str1.index, "z") self.assertEquals(7, str1.index("r", 7)) @@ -247,6 +252,12 @@ class TestStringMixIn(unittest.TestCase): self.assertFalse(str13.isdigit()) self.assertTrue(str14.isdigit()) + if py3k: + self.assertTrue(str9.isidentifier()) + self.assertTrue(str10.isidentifier()) + self.assertFalse(str11.isidentifier()) + self.assertFalse(str12.isidentifier()) + str15 = _FakeString("") str16 = _FakeString("FooBar") self.assertTrue(str9.islower()) @@ -258,6 +269,14 @@ class TestStringMixIn(unittest.TestCase): self.assertTrue(str13.isnumeric()) self.assertTrue(str14.isnumeric()) + if py3k: + str16B = _FakeString("\x01\x02") + self.assertTrue(str9.isprintable()) + self.assertTrue(str13.isprintable()) + self.assertTrue(str14.isprintable()) + self.assertTrue(str15.isprintable()) + self.assertFalse(str16B.isprintable()) + str17 = _FakeString(" ") str18 = _FakeString("\t \t \r\n") self.assertFalse(str1.isspace()) @@ -283,20 +302,26 @@ class TestStringMixIn(unittest.TestCase): self.assertEquals("fake string ", str1.ljust(16)) self.assertEquals("fake stringqqqq", str1.ljust(15, "q")) + str22 = _FakeString("ß") self.assertEquals("", str15.lower()) self.assertEquals("foobar", str16.lower()) + self.assertEquals("ß", str22.lower()) + if py3k: + self.assertEquals("", str15.casefold()) + self.assertEquals("foobar", str16.casefold()) + self.assertEquals("ss", str22.casefold()) - str22 = _FakeString(" fake string ") + str23 = _FakeString(" fake string ") self.assertEquals("fake string", str1.lstrip()) - self.assertEquals("fake string ", str22.lstrip()) + self.assertEquals("fake string ", str23.lstrip()) self.assertEquals("ke string", str1.lstrip("abcdef")) self.assertEquals(("fa", "ke", " string"), str1.partition("ke")) self.assertEquals(("fake string", "", ""), str1.partition("asdf")) - str23 = _FakeString("boo foo moo") + str24 = _FakeString("boo foo moo") self.assertEquals("real string", str1.replace("fake", "real")) - self.assertEquals("bu fu moo", str23.replace("oo", "u", 2)) + self.assertEquals("bu fu moo", str24.replace("oo", "u", 2)) self.assertEquals(3, str1.rfind("e")) self.assertEquals(-1, str1.rfind("z")) @@ -319,44 +344,44 @@ class TestStringMixIn(unittest.TestCase): self.assertEquals(("fa", "ke", " string"), str1.rpartition("ke")) self.assertEquals(("", "", "fake string"), str1.rpartition("asdf")) - str24 = _FakeString(" this is a sentence with whitespace ") + str25 = _FakeString(" this is a sentence with whitespace ") actual = ["this", "is", "a", "sentence", "with", "whitespace"] - self.assertEquals(actual, str24.rsplit()) - self.assertEquals(actual, str24.rsplit(None)) + self.assertEquals(actual, str25.rsplit()) + self.assertEquals(actual, str25.rsplit(None)) actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", "", "whitespace", ""] - self.assertEquals(actual, str24.rsplit(" ")) + self.assertEquals(actual, str25.rsplit(" ")) actual = [" this is a", "sentence", "with", "whitespace"] - self.assertEquals(actual, str24.rsplit(None, 3)) + self.assertEquals(actual, str25.rsplit(None, 3)) actual = [" this is a sentence with", "", "whitespace", ""] - self.assertEquals(actual, str24.rsplit(" ", 3)) + self.assertEquals(actual, str25.rsplit(" ", 3)) self.assertEquals("fake string", str1.rstrip()) - self.assertEquals(" fake string", str22.rstrip()) + self.assertEquals(" fake string", str23.rstrip()) self.assertEquals("fake stri", str1.rstrip("ngr")) actual = ["this", "is", "a", "sentence", "with", "whitespace"] - self.assertEquals(actual, str24.split()) - self.assertEquals(actual, str24.split(None)) + self.assertEquals(actual, str25.split()) + self.assertEquals(actual, str25.split(None)) actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", "", "whitespace", ""] - self.assertEquals(actual, str24.split(" ")) + self.assertEquals(actual, str25.split(" ")) actual = ["this", "is", "a", "sentence with whitespace "] - self.assertEquals(actual, str24.split(None, 3)) + self.assertEquals(actual, str25.split(None, 3)) actual = ["", "", "", "this is a sentence with whitespace "] - self.assertEquals(actual, str24.split(" ", 3)) + self.assertEquals(actual, str25.split(" ", 3)) - str25 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") + str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") self.assertEquals(["lines", "of", "text", "are", "presented", "here"], - str25.splitlines()) + str26.splitlines()) self.assertEquals(["lines\n", "of\n", "text\r\n", "are\r\n", - "presented\n", "here"], str25.splitlines(True)) + "presented\n", "here"], str26.splitlines(True)) self.assertTrue(str1.startswith("fake")) self.assertFalse(str1.startswith("faker")) self.assertEquals("fake string", str1.strip()) - self.assertEquals("fake string", str22.strip()) + self.assertEquals("fake string", str23.strip()) self.assertEquals("ke stri", str1.strip("abcdefngr")) self.assertEquals("fOObAR", str16.swapcase()) From 88201ecb5425689fae53343899b3ee1cc89d77c4 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 18 Mar 2013 03:21:36 -0400 Subject: [PATCH 034/115] Adding TestTokens. Add from __future__ import unicode_literals to a few files. --- tests/test_builder.py | 1 + tests/test_ctokenizer.py | 1 + tests/test_parser.py | 1 + tests/test_pytokenizer.py | 1 + tests/test_tokens.py | 78 ++++++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 81 insertions(+), 1 deletion(-) diff --git a/tests/test_builder.py b/tests/test_builder.py index e38e683..a3518fd 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -20,6 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +from __future__ import unicode_literals import unittest class TestBuilder(unittest.TestCase): diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 86f4787..07b5290 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -20,6 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +from __future__ import unicode_literals import unittest from _test_tokenizer import TokenizerTestCase diff --git a/tests/test_parser.py b/tests/test_parser.py index 3f9b2e6..5ea2b49 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -20,6 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +from __future__ import unicode_literals import unittest class TestParser(unittest.TestCase): diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index 4254748..a2f2482 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -20,6 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +from __future__ import unicode_literals import unittest from _test_tokenizer import TokenizerTestCase diff --git a/tests/test_tokens.py b/tests/test_tokens.py index 0e7f87b..5a18b8e 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -20,10 +20,86 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +from __future__ import unicode_literals import unittest +from mwparserfromhell.compat import py3k +from mwparserfromhell.parser import tokens + class TestTokens(unittest.TestCase): - pass + """Test cases for the Token class and its subclasses.""" + + def test_issubclass(self): + """check that all classes within the tokens module are really Tokens""" + for name in tokens.__all__: + klass = getattr(tokens, name) + self.assertTrue(issubclass(klass, tokens.Token)) + self.assertIsInstance(klass(), klass) + self.assertIsInstance(klass(), tokens.Token) + + def test_attributes(self): + """check that Token attributes can be managed properly""" + token1 = tokens.Token() + token2 = tokens.Token(foo="bar", baz=123) + + self.assertEquals("bar", token2.foo) + self.assertEquals(123, token2.baz) + self.assertRaises(KeyError, lambda: token1.foo) + self.assertRaises(KeyError, lambda: token2.bar) + + token1.spam = "eggs" + token2.foo = "ham" + del token2.baz + + self.assertEquals("eggs", token1.spam) + self.assertEquals("ham", token2.foo) + self.assertRaises(KeyError, lambda: token2.baz) + self.assertRaises(KeyError, delattr, token2, "baz") + + def test_repr(self): + """check that repr() on a Token works as expected""" + token1 = tokens.Token() + token2 = tokens.Token(foo="bar", baz=123) + token3 = tokens.Text(text="earwig" * 100) + hundredchars = ("earwig" * 100)[:97] + "..." + + self.assertEquals("Token()", repr(token1)) + if py3k: + token2repr = "Token(foo='bar', baz=123)" + token3repr = "Text(text='" + hundredchars + "')" + else: + token2repr = "Token(foo=u'bar', baz=123)" + token3repr = "Text(text=u'" + hundredchars + "')" + self.assertEquals(token2repr, repr(token2)) + self.assertEquals(token3repr, repr(token3)) + + def test_equality(self): + """check that equivalent tokens are considered equal""" + token1 = tokens.Token() + token2 = tokens.Token() + token3 = tokens.Token(foo="bar", baz=123) + token4 = tokens.Text(text="asdf") + token5 = tokens.Text(text="asdf") + token6 = tokens.TemplateOpen(text="asdf") + + self.assertEquals(token1, token2) + self.assertEquals(token2, token1) + self.assertEquals(token4, token5) + self.assertEquals(token5, token4) + self.assertNotEquals(token1, token3) + self.assertNotEquals(token2, token3) + self.assertNotEquals(token4, token6) + self.assertNotEquals(token5, token6) + + def test_repr_equality(self): + "check that eval(repr(token)) == token" + tests = [ + tokens.Token(), + tokens.Token(foo="bar", baz=123), + tokens.Text(text="earwig") + ] + for token in tests: + self.assertEquals(token, eval(repr(token), vars(tokens))) if __name__ == "__main__": unittest.main(verbosity=2) From b9d2a83b8a7d187be92772af7510a15fdbd414cd Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Tue, 19 Mar 2013 10:50:41 -0400 Subject: [PATCH 035/115] Starting TestSmartList. --- mwparserfromhell/smart_list.py | 3 +++ tests/test_smart_list.py | 42 ++++++++++++++++++++++++++++++++++++++++++ tests/test_string_mixin.py | 2 +- 3 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 tests/test_smart_list.py diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py index 625307f..67d96be 100644 --- a/mwparserfromhell/smart_list.py +++ b/mwparserfromhell/smart_list.py @@ -361,3 +361,6 @@ class _ListProxy(list): else: item.sort() self._parent[self._start:self._stop:self._step] = item + + +del inheritdoc \ No newline at end of file diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py new file mode 100644 index 0000000..e22ad27 --- /dev/null +++ b/tests/test_smart_list.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.smart_list import SmartList, _ListProxy + +class TestSmartList(unittest.TestCase): + """Test cases for the SmartList class and its child, _ListProxy.""" + def test_docs(self): + """make sure the methods of SmartList/_ListProxy have docstrings""" + methods = ["append", "count", "extend", "index", "insert", "pop", + "remove", "reverse", "sort"] + for meth in methods: + expected = getattr(list, meth).__doc__ + smartlist_doc = getattr(SmartList, meth).__doc__ + listproxy_doc = getattr(_ListProxy, meth).__doc__ + self.assertEquals(expected, smartlist_doc) + self.assertEquals(expected, listproxy_doc) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index bf49629..28b30dd 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -38,7 +38,7 @@ class _FakeString(StringMixIn): class TestStringMixIn(unittest.TestCase): """Test cases for the StringMixIn class.""" def test_docs(self): - """make sure the various functions of StringMixIn have docstrings""" + """make sure the various methods of StringMixIn have docstrings""" methods = [ "capitalize", "center", "count", "encode", "endswith", "expandtabs", "find", "format", "index", "isalnum", "isalpha", From fe3328aa386c9212d19cebeb3a0c5e626c53b7fc Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 22 Mar 2013 08:38:29 -0400 Subject: [PATCH 036/115] test_doctest() --- mwparserfromhell/smart_list.py | 2 +- tests/test_smart_list.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py index 67d96be..7c29c60 100644 --- a/mwparserfromhell/smart_list.py +++ b/mwparserfromhell/smart_list.py @@ -363,4 +363,4 @@ class _ListProxy(list): self._parent[self._start:self._stop:self._step] = item -del inheritdoc \ No newline at end of file +del inheritdoc diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index e22ad27..b83f4d3 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -38,5 +38,15 @@ class TestSmartList(unittest.TestCase): self.assertEquals(expected, smartlist_doc) self.assertEquals(expected, listproxy_doc) + def test_doctest(self): + """make sure a test embedded in SmartList's docstring passes""" + parent = SmartList([0, 1, 2, 3]) + self.assertEquals([0, 1, 2, 3], parent) + child = parent[2:] + self.assertEquals([2, 3], child) + child.append(4) + self.assertEquals([2, 3, 4], child) + self.assertEquals([0, 1, 2, 3, 4], parent) + if __name__ == "__main__": unittest.main(verbosity=2) From a3a35b1e73e7f4cfa84c449d7dfcc191105154f0 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 23 Mar 2013 11:29:20 -0400 Subject: [PATCH 037/115] Only compile Tokenizer on Python 2 for now. --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 445473e..8b4ae86 100644 --- a/setup.py +++ b/setup.py @@ -24,6 +24,7 @@ from setuptools import setup, find_packages, Extension from mwparserfromhell import __version__ +from mwparserfromhell.compat import py3k with open("README.rst") as fp: long_docs = fp.read() @@ -37,7 +38,7 @@ tokenizer = Extension("mwparserfromhell.parser._tokenizer", setup( name = "mwparserfromhell", packages = find_packages(exclude=("tests",)), - ext_modules = [tokenizer], + ext_modules = [] if py3k else [tokenizer], test_suite = "tests", version = __version__, author = "Ben Kurtovic", From ff51d7f5e59577fb99d03d7848b7091be1b82d80 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 23 Mar 2013 15:12:36 -0400 Subject: [PATCH 038/115] Some tests for SmartLists; __reversed__ in StringMixIn --- mwparserfromhell/string_mixin.py | 3 + tests/test_smart_list.py | 164 ++++++++++++++++++++++++++++++++++++++- tests/test_string_mixin.py | 3 + 3 files changed, 169 insertions(+), 1 deletion(-) diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index 9e6d551..7d269f5 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -113,6 +113,9 @@ class StringMixIn(object): def __getitem__(self, key): return self.__unicode__()[key] + def __reversed__(self): + return reversed(self.__unicode__()) + def __contains__(self, item): if isinstance(item, StringMixIn): return str(item) in self.__unicode__() diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index b83f4d3..5fc26b3 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -23,6 +23,7 @@ from __future__ import unicode_literals import unittest +from mwparserfromhell.compat import py3k from mwparserfromhell.smart_list import SmartList, _ListProxy class TestSmartList(unittest.TestCase): @@ -39,7 +40,7 @@ class TestSmartList(unittest.TestCase): self.assertEquals(expected, listproxy_doc) def test_doctest(self): - """make sure a test embedded in SmartList's docstring passes""" + """make sure the test embedded in SmartList's docstring passes""" parent = SmartList([0, 1, 2, 3]) self.assertEquals([0, 1, 2, 3], parent) child = parent[2:] @@ -48,5 +49,166 @@ class TestSmartList(unittest.TestCase): self.assertEquals([2, 3, 4], child) self.assertEquals([0, 1, 2, 3, 4], parent) + def test_parent_magics(self): + """make sure magically implemented SmartList features work""" + # __getitem__ + # __setitem__ + # __delitem__ + # if not py3k: + # __getslice__ + # __setslice__ + # __delslice__ + # __add__ + # __radd__ + # __iadd__ + + def test_parent_unaffected_magics(self): + """sanity checks against SmartList features that were not modified""" + list1 = SmartList([0, 1, 2, 3, "one", "two"]) + list2 = SmartList([]) + list3 = SmartList([0, 2, 3, 4]) + list4 = SmartList([0, 1, 2]) + + if py3k: + self.assertEquals("[0, 1, 2, 3, 'one', 'two']", str(list1)) + self.assertEquals(b"[0, 1, 2, 3, 'one', 'two']", bytes(list1)) + self.assertEquals("[0, 1, 2, 3, 'one', 'two']", repr(list1)) + else: + self.assertEquals("[0, 1, 2, 3, u'one', u'two']", unicode(list1)) + self.assertEquals(b"[0, 1, 2, 3, u'one', u'two']", str(list1)) + self.assertEquals(b"[0, 1, 2, 3, u'one', u'two']", repr(list1)) + + self.assertTrue(list1 < list3) + self.assertTrue(list1 <= list3) + self.assertFalse(list1 == list3) + self.assertTrue(list1 != list3) + self.assertFalse(list1 > list3) + self.assertFalse(list1 >= list3) + + other1 = [0, 2, 3, 4] + self.assertTrue(list1 < other1) + self.assertTrue(list1 <= other1) + self.assertFalse(list1 == other1) + self.assertTrue(list1 != other1) + self.assertFalse(list1 > other1) + self.assertFalse(list1 >= other1) + + other2 = [0, 0, 1, 2] + self.assertFalse(list1 < other2) + self.assertFalse(list1 <= other2) + self.assertFalse(list1 == other2) + self.assertTrue(list1 != other2) + self.assertTrue(list1 > other2) + self.assertTrue(list1 >= other2) + + other3 = [0, 1, 2, 3, "one", "two"] + self.assertFalse(list1 < other3) + self.assertTrue(list1 <= other3) + self.assertTrue(list1 == other3) + self.assertFalse(list1 != other3) + self.assertFalse(list1 > other3) + self.assertTrue(list1 >= other3) + + self.assertTrue(bool(list1)) + self.assertFalse(bool(list2)) + + self.assertEquals(6, len(list1)) + self.assertEquals(0, len(list2)) + + out = [] + for obj in list1: + out.append(obj) + self.assertEquals([0, 1, 2, 3, "one", "two"], out) + + out = [] + for ch in list2: + out.append(ch) + self.assertEquals([], out) + + gen1 = iter(list1) + out = [] + for i in range(len(list1)): + out.append(gen1.next()) + self.assertRaises(StopIteration, gen1.next) + self.assertEquals([0, 1, 2, 3, "one", "two"], out) + gen2 = iter(list2) + self.assertRaises(StopIteration, gen2.next) + + self.assertEquals(["two", "one", 3, 2, 1, 0], list(reversed(list1))) + self.assertEquals([], list(reversed(list2))) + + self.assertTrue("one" in list1) + self.assertTrue(3 in list1) + self.assertFalse(10 in list1) + self.assertFalse(0 in list2) + + self.assertEquals([], list2 * 5) + self.assertEquals([], 5 * list2) + self.assertEquals([0, 1, 2, 0, 1, 2, 0, 1, 2], list4 * 3) + self.assertEquals([0, 1, 2, 0, 1, 2, 0, 1, 2], 3 * list4) + list4 *= 2 + self.assertEquals([0, 1, 2, 0, 1, 2], list4) + + def test_parent_methods(self): + # append + # count + # extend + # index + # insert + # pop + # remove + # reverse + # sort + + def test_child_magics(self): + # if py3k: + # __str__ + # __bytes__ + # else: + # __unicode__ + # __str__ + # __repr__ + # __lt__ + # __le__ + # __eq__ + # __ne__ + # __gt__ + # __ge__ + # if py3k: + # __bool__ + # else: + # __nonzero__ + # __len__ + # __getitem__ + # __setitem__ + # __delitem__ + # __iter__ + # __reversed__ + # __contains__ + # if not py3k: + # __getslice__ + # __setslice__ + # __delslice__ + # __add__ + # __radd__ + # __iadd__ + # __mul__ + # __rmul__ + # __imul__ + + def test_child_methods(self): + # append + # count + # extend + # index + # insert + # pop + # remove + # reverse + # sort + + def test_influence(self): + pass + if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 28b30dd..0d95311 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -143,6 +143,9 @@ class TestStringMixIn(unittest.TestCase): self.assertEquals(expected, out) self.assertRaises(StopIteration, gen2.next) + self.assertEquals("gnirts ekaf", "".join(list(reversed(str1)))) + self.assertEquals([], list(reversed(str2))) + self.assertEquals("f", str1[0]) self.assertEquals(" ", str1[4]) self.assertEquals("g", str1[10]) From 65c3950e89bd3d5f60590707fc0aa4269c2b9612 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 24 Mar 2013 12:01:43 -0400 Subject: [PATCH 039/115] Fix bug when an extended slice's stop argument is missing. --- mwparserfromhell/smart_list.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py index 7c29c60..47f7232 100644 --- a/mwparserfromhell/smart_list.py +++ b/mwparserfromhell/smart_list.py @@ -76,7 +76,7 @@ class SmartList(list): def __getitem__(self, key): if not isinstance(key, slice): return super(SmartList, self).__getitem__(key) - sliceinfo = [key.start or 0, key.stop or 0, key.step or 1] + sliceinfo = [key.start or 0, key.stop or maxsize, key.step or 1] child = _ListProxy(self, sliceinfo) self._children[id(child)] = (child, sliceinfo) return child From 10a7e5d2418e9d7afc652b03ef0686434bde8683 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 24 Mar 2013 12:17:52 -0400 Subject: [PATCH 040/115] Fix the same bug in __setitem__ and __delitem__ --- mwparserfromhell/smart_list.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py index 47f7232..e42dbae 100644 --- a/mwparserfromhell/smart_list.py +++ b/mwparserfromhell/smart_list.py @@ -86,6 +86,7 @@ class SmartList(list): return super(SmartList, self).__setitem__(key, item) item = list(item) super(SmartList, self).__setitem__(key, item) + key = slice(key.start or 0, key.stop or maxsize) diff = len(item) - key.stop + key.start values = self._children.values if py3k else self._children.itervalues if diff: @@ -97,7 +98,9 @@ class SmartList(list): def __delitem__(self, key): super(SmartList, self).__delitem__(key) - if not isinstance(key, slice): + if isinstance(key, slice): + key = slice(key.start or 0, key.stop or maxsize) + else: key = slice(key, key + 1) diff = key.stop - key.start values = self._children.values if py3k else self._children.itervalues From ce6929107edf88065a7dd96082c41ab59732984b Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 24 Mar 2013 12:50:10 -0400 Subject: [PATCH 041/115] Implementing test_parent_get_set_del(). --- tests/test_smart_list.py | 87 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 78 insertions(+), 9 deletions(-) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 5fc26b3..71f428b 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -49,15 +49,79 @@ class TestSmartList(unittest.TestCase): self.assertEquals([2, 3, 4], child) self.assertEquals([0, 1, 2, 3, 4], parent) - def test_parent_magics(self): - """make sure magically implemented SmartList features work""" - # __getitem__ - # __setitem__ - # __delitem__ - # if not py3k: - # __getslice__ - # __setslice__ - # __delslice__ + def test_parent_get_set_del(self): + """make sure SmartList's getitem/setitem/delitem work""" + def assign(L, s1, s2, s3, val): + L[s1:s2:s3] = val + def delete(L, s1): + del L[s1] + + list1 = SmartList([0, 1, 2, 3, "one", "two"]) + list2 = SmartList(list(range(10))) + + self.assertEquals(1, list1[1]) + self.assertEquals("one", list1[-2]) + self.assertEquals([2, 3], list1[2:4]) + self.assertRaises(IndexError, lambda: list1[6]) + self.assertRaises(IndexError, lambda: list1[-7]) + + self.assertEquals([0, 1, 2], list1[:3]) + self.assertEquals([0, 1, 2, 3, "one", "two"], list1[:]) + self.assertEquals([3, "one", "two"], list1[3:]) + self.assertEquals(["one", "two"], list1[-2:]) + self.assertEquals([0, 1], list1[:-4]) + self.assertEquals([], list1[6:]) + self.assertEquals([], list1[4:2]) + + self.assertEquals([0, 2, "one"], list1[0:5:2]) + self.assertEquals([0, 2], list1[0:-3:2]) + self.assertEquals([0, 1, 2, 3, "one", "two"], list1[::]) + self.assertEquals([2, 3, "one", "two"], list1[2::]) + self.assertEquals([0, 1, 2, 3], list1[:4:]) + self.assertEquals([2, 3], list1[2:4:]) + self.assertEquals([0, 2, 4, 6, 8], list2[::2]) + self.assertEquals([2, 5, 8], list2[2::3]) + self.assertEquals([0, 3], list2[:6:3]) + self.assertEquals([2, 5, 8], list2[-8:9:3]) + self.assertEquals([], list2[100000:1000:-100]) + + list1[3] = 100 + self.assertEquals(100, list1[3]) + list1[5:] = [6, 7, 8] + self.assertEquals([6, 7, 8], list1[5:]) + self.assertEquals([0, 1, 2, 100, "one", 6, 7, 8], list1) + list1[2:4] = [-1, -2, -3, -4, -5] + self.assertEquals([0, 1, -1, -2, -3, -4, -5, "one", 6, 7, 8], list1) + list1[0:-3] = [99] + self.assertEquals([99, 6, 7, 8], list1) + list2[0:6:2] = [100, 102, 104] + self.assertEquals([100, 1, 102, 3, 104, 5, 6, 7, 8, 9], list2) + list2[::3] = [200, 203, 206, 209] + self.assertEquals([200, 1, 102, 203, 104, 5, 206, 7, 8, 209], list2) + list2[::] = range(7) + self.assertEquals([0, 1, 2, 3, 4, 5, 6], list2) + self.assertRaises(ValueError, + lambda: assign(list2, 0, 5, 2, [100, 102, 104, 106])) + + del list2[2] + self.assertEquals([0, 1, 3, 4, 5, 6], list2) + del list2[-3] + self.assertEquals([0, 1, 3, 5, 6], list2) + self.assertRaises(IndexError, lambda: delete(list2, 100)) + self.assertRaises(IndexError, lambda: delete(list2, -6)) + list2[:] = range(10) + del list2[3:6] + self.assertEquals([0, 1, 2, 6, 7, 8, 9], list2) + del list2[-2:] + self.assertEquals([0, 1, 2, 6, 7], list2) + del list2[:2] + self.assertEquals([2, 6, 7], list2) + list2[:] = range(10) + del list2[2:8:2] + self.assertEquals([0, 1, 3, 5, 7, 8, 9], list2) + + def test_parent_add(self): + """make sure SmartList's add/radd/iadd work""" # __add__ # __radd__ # __iadd__ @@ -150,6 +214,7 @@ class TestSmartList(unittest.TestCase): self.assertEquals([0, 1, 2, 0, 1, 2], list4) def test_parent_methods(self): + pass # append # count # extend @@ -161,6 +226,7 @@ class TestSmartList(unittest.TestCase): # sort def test_child_magics(self): + pass # if py3k: # __str__ # __bytes__ @@ -197,6 +263,7 @@ class TestSmartList(unittest.TestCase): # __imul__ def test_child_methods(self): + pass # append # count # extend @@ -209,6 +276,8 @@ class TestSmartList(unittest.TestCase): def test_influence(self): pass + # test whether changes are propogated correctly + # also test whether children that exit scope are removed from parent's map if __name__ == "__main__": unittest.main(verbosity=2) From 67611bfb5bdbc2c445b264d48fea710d99ad56f7 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 24 Mar 2013 13:43:15 -0400 Subject: [PATCH 042/115] Implement test_parent_add(). --- tests/test_smart_list.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 71f428b..1b1c267 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -23,7 +23,7 @@ from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import py3k +from mwparserfromhell.compat import py3k, range from mwparserfromhell.smart_list import SmartList, _ListProxy class TestSmartList(unittest.TestCase): @@ -122,9 +122,15 @@ class TestSmartList(unittest.TestCase): def test_parent_add(self): """make sure SmartList's add/radd/iadd work""" - # __add__ - # __radd__ - # __iadd__ + list1 = SmartList(range(5)) + list2 = SmartList(range(5, 10)) + self.assertEquals([0, 1, 2, 3, 4, 5, 6], list1 + [5, 6]) + self.assertEquals([0, 1, 2, 3, 4], list1) + self.assertEquals(list(range(10)), list1 + list2) + self.assertEquals([-2, -1, 0, 1, 2, 3, 4], [-2, -1] + list1) + self.assertEquals([0, 1, 2, 3, 4], list1) + list1 += ["foo", "bar", "baz"] + self.assertEquals([0, 1, 2, 3, 4, "foo", "bar", "baz"], list1) def test_parent_unaffected_magics(self): """sanity checks against SmartList features that were not modified""" From fb92349909f302833ebcfe905578d1d6e75fd891 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 24 Mar 2013 14:09:47 -0400 Subject: [PATCH 043/115] Fix parsing of arguments in SmartList.sort() --- mwparserfromhell/smart_list.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py index e42dbae..b8d02d5 100644 --- a/mwparserfromhell/smart_list.py +++ b/mwparserfromhell/smart_list.py @@ -168,16 +168,14 @@ class SmartList(list): copy = list(self) for child in self._children: child._parent = copy + kwargs = {} if cmp is not None: - if key is not None: - if reverse is not None: - super(SmartList, self).sort(cmp, key, reverse) - else: - super(SmartList, self).sort(cmp, key) - else: - super(SmartList, self).sort(cmp) - else: - super(SmartList, self).sort() + kwargs["cmp"] = cmp + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + super(SmartList, self).sort(**kwargs) class _ListProxy(list): From 986e3ed855971593d2ea6f68962fed4d1ca8d2ca Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 24 Mar 2013 14:41:31 -0400 Subject: [PATCH 044/115] Implement test_parent_methods(). --- tests/test_smart_list.py | 79 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 65 insertions(+), 14 deletions(-) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 1b1c267..2fdfeff 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -100,15 +100,15 @@ class TestSmartList(unittest.TestCase): self.assertEquals([200, 1, 102, 203, 104, 5, 206, 7, 8, 209], list2) list2[::] = range(7) self.assertEquals([0, 1, 2, 3, 4, 5, 6], list2) - self.assertRaises(ValueError, - lambda: assign(list2, 0, 5, 2, [100, 102, 104, 106])) + self.assertRaises(ValueError, assign, list2, 0, 5, 2, + [100, 102, 104, 106]) del list2[2] self.assertEquals([0, 1, 3, 4, 5, 6], list2) del list2[-3] self.assertEquals([0, 1, 3, 5, 6], list2) - self.assertRaises(IndexError, lambda: delete(list2, 100)) - self.assertRaises(IndexError, lambda: delete(list2, -6)) + self.assertRaises(IndexError, delete, list2, 100) + self.assertRaises(IndexError, delete, list2, -6) list2[:] = range(10) del list2[3:6] self.assertEquals([0, 1, 2, 6, 7, 8, 9], list2) @@ -220,16 +220,67 @@ class TestSmartList(unittest.TestCase): self.assertEquals([0, 1, 2, 0, 1, 2], list4) def test_parent_methods(self): - pass - # append - # count - # extend - # index - # insert - # pop - # remove - # reverse - # sort + """make sure SmartList's non-magic methods work, like append()""" + list1 = SmartList(range(5)) + list2 = SmartList(["foo"]) + list3 = SmartList([("a", 5), ("d", 2), ("b", 8), ("c", 3)]) + + list1.append(5) + list1.append(1) + list1.append(2) + self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2], list1) + + self.assertEquals(0, list1.count(6)) + self.assertEquals(2, list1.count(1)) + + list1.extend(range(5, 8)) + self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) + + self.assertEquals(1, list1.index(1)) + self.assertEquals(6, list1.index(1, 3)) + self.assertEquals(6, list1.index(1, 3, 7)) + self.assertRaises(ValueError, list1.index, 1, 3, 5) + + list1.insert(0, -1) + self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) + list1.insert(-1, 6.5) + self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1) + list1.insert(100, 8) + self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1) + + self.assertEquals(8, list1.pop()) + self.assertEquals(7, list1.pop()) + self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1) + self.assertEquals(-1, list1.pop(0)) + self.assertEquals(5, list1.pop(5)) + self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5, 6, 6.5], list1) + self.assertEquals("foo", list2.pop()) + self.assertRaises(IndexError, list2.pop) + self.assertEquals([], list2) + + list1.remove(6.5) + self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5, 6], list1) + list1.remove(1) + self.assertEquals([0, 2, 3, 4, 1, 2, 5, 6], list1) + list1.remove(1) + self.assertEquals([0, 2, 3, 4, 2, 5, 6], list1) + self.assertRaises(ValueError, list1.remove, 1) + + list1.reverse() + self.assertEquals([6, 5, 2, 4, 3, 2, 0], list1) + + list1.sort() + self.assertEquals([0, 2, 2, 3, 4, 5, 6], list1) + list1.sort(reverse=True) + self.assertEquals([6, 5, 4, 3, 2, 2, 0], list1) + list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y)) # Distance from 3 + self.assertEquals([3, 4, 2, 2, 5, 6, 0], list1) + list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y), reverse=True) + self.assertEquals([6, 0, 5, 4, 2, 2, 3], list1) + list3.sort(key=lambda i: i[1]) + self.assertEquals([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) + list3.sort(key=lambda i: i[1], reverse=True) + self.assertEquals([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3) def test_child_magics(self): pass From d85ff73c19e026fa209c252b4d96699bbeb75121 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 24 Mar 2013 15:46:30 -0400 Subject: [PATCH 045/115] Squashing some sneaky bugs in SmartLists's children. --- mwparserfromhell/smart_list.py | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py index b8d02d5..229500c 100644 --- a/mwparserfromhell/smart_list.py +++ b/mwparserfromhell/smart_list.py @@ -76,7 +76,8 @@ class SmartList(list): def __getitem__(self, key): if not isinstance(key, slice): return super(SmartList, self).__getitem__(key) - sliceinfo = [key.start or 0, key.stop or maxsize, key.step or 1] + sliceinfo = [key.start or 0, maxsize if key.stop is None else key.stop, + key.step or 1] child = _ListProxy(self, sliceinfo) self._children[id(child)] = (child, sliceinfo) return child @@ -86,12 +87,12 @@ class SmartList(list): return super(SmartList, self).__setitem__(key, item) item = list(item) super(SmartList, self).__setitem__(key, item) - key = slice(key.start or 0, key.stop or maxsize) + key = slice(key.start or 0, maxsize if key.stop is None else key.stop) diff = len(item) - key.stop + key.start values = self._children.values if py3k else self._children.itervalues if diff: for child, (start, stop, step) in values(): - if start >= key.stop: + if start > key.stop: self._children[id(child)][1][0] += diff if stop >= key.stop and stop != maxsize: self._children[id(child)][1][1] += diff @@ -99,7 +100,8 @@ class SmartList(list): def __delitem__(self, key): super(SmartList, self).__delitem__(key) if isinstance(key, slice): - key = slice(key.start or 0, key.stop or maxsize) + key = slice(key.start or 0, + maxsize if key.stop is None else key.stop) else: key = slice(key, key + 1) diff = key.stop - key.start @@ -107,7 +109,7 @@ class SmartList(list): for child, (start, stop, step) in values(): if start > key.start: self._children[id(child)][1][0] -= diff - if stop >= key.stop: + if stop >= key.stop and stop != maxsize: self._children[id(child)][1][1] -= diff if not py3k: @@ -296,6 +298,8 @@ class _ListProxy(list): @property def _stop(self): """The ending index of this list, exclusive.""" + if self._sliceinfo[1] == maxsize: + return len(self._parent) return self._sliceinfo[1] @property @@ -329,18 +333,25 @@ class _ListProxy(list): @inheritdoc def insert(self, index, item): + if index < 0: + index = len(self) + index self._parent.insert(self._start + index, item) @inheritdoc def pop(self, index=None): + length = len(self) if index is None: - index = len(self) - 1 + index = length - 1 + elif index < 0: + index = length + index + if index < 0 or index >= length: + raise IndexError("pop index out of range") return self._parent.pop(self._start + index) @inheritdoc def remove(self, item): index = self.index(item) - del self._parent[index] + del self._parent[self._start + index] @inheritdoc def reverse(self): @@ -351,16 +362,14 @@ class _ListProxy(list): @inheritdoc def sort(self, cmp=None, key=None, reverse=None): item = self._render() + kwargs = {} if cmp is not None: - if key is not None: - if reverse is not None: - item.sort(cmp, key, reverse) - else: - item.sort(cmp, key) - else: - item.sort(cmp) - else: - item.sort() + kwargs["cmp"] = cmp + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + item.sort(**kwargs) self._parent[self._start:self._stop:self._step] = item From b8e926a2569c7ec15001d19e767dd475a4f249e9 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 24 Mar 2013 15:47:21 -0400 Subject: [PATCH 046/115] Abstract out public list method tests; implement test_child_methods() --- tests/test_smart_list.py | 145 ++++++++++++++++++++++++----------------------- 1 file changed, 74 insertions(+), 71 deletions(-) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 2fdfeff..44775b4 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -28,6 +28,71 @@ from mwparserfromhell.smart_list import SmartList, _ListProxy class TestSmartList(unittest.TestCase): """Test cases for the SmartList class and its child, _ListProxy.""" + + def _test_list_methods(self, builder): + """Run tests on the public methods of a list built with *builder*.""" + list1 = builder(range(5)) + list2 = builder(["foo"]) + list3 = builder([("a", 5), ("d", 2), ("b", 8), ("c", 3)]) + + list1.append(5) + list1.append(1) + list1.append(2) + self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2], list1) + + self.assertEquals(0, list1.count(6)) + self.assertEquals(2, list1.count(1)) + + list1.extend(range(5, 8)) + self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) + + self.assertEquals(1, list1.index(1)) + self.assertEquals(6, list1.index(1, 3)) + self.assertEquals(6, list1.index(1, 3, 7)) + self.assertRaises(ValueError, list1.index, 1, 3, 5) + + list1.insert(0, -1) + self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) + list1.insert(-1, 6.5) + self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1) + list1.insert(13, 8) + self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1) + + self.assertEquals(8, list1.pop()) + self.assertEquals(7, list1.pop()) + self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1) + self.assertEquals(-1, list1.pop(0)) + self.assertEquals(5, list1.pop(5)) + self.assertEquals(6.5, list1.pop(-1)) + self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5, 6], list1) + self.assertEquals("foo", list2.pop()) + self.assertRaises(IndexError, list2.pop) + self.assertEquals([], list2) + + list1.remove(6) + self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5], list1) + list1.remove(1) + self.assertEquals([0, 2, 3, 4, 1, 2, 5], list1) + list1.remove(1) + self.assertEquals([0, 2, 3, 4, 2, 5], list1) + self.assertRaises(ValueError, list1.remove, 1) + + list1.reverse() + self.assertEquals([5, 2, 4, 3, 2, 0], list1) + + list1.sort() + self.assertEquals([0, 2, 2, 3, 4, 5], list1) + list1.sort(reverse=True) + self.assertEquals([5, 4, 3, 2, 2, 0], list1) + list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y)) # Distance from 3 + self.assertEquals([3, 4, 2, 2, 5, 0], list1) + list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y), reverse=True) + self.assertEquals([0, 5, 4, 2, 2, 3], list1) + list3.sort(key=lambda i: i[1]) + self.assertEquals([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) + list3.sort(key=lambda i: i[1], reverse=True) + self.assertEquals([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3) + def test_docs(self): """make sure the methods of SmartList/_ListProxy have docstrings""" methods = ["append", "count", "extend", "index", "insert", "pop", @@ -221,68 +286,10 @@ class TestSmartList(unittest.TestCase): def test_parent_methods(self): """make sure SmartList's non-magic methods work, like append()""" - list1 = SmartList(range(5)) - list2 = SmartList(["foo"]) - list3 = SmartList([("a", 5), ("d", 2), ("b", 8), ("c", 3)]) - - list1.append(5) - list1.append(1) - list1.append(2) - self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2], list1) - - self.assertEquals(0, list1.count(6)) - self.assertEquals(2, list1.count(1)) - - list1.extend(range(5, 8)) - self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) - - self.assertEquals(1, list1.index(1)) - self.assertEquals(6, list1.index(1, 3)) - self.assertEquals(6, list1.index(1, 3, 7)) - self.assertRaises(ValueError, list1.index, 1, 3, 5) - - list1.insert(0, -1) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) - list1.insert(-1, 6.5) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1) - list1.insert(100, 8) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1) - - self.assertEquals(8, list1.pop()) - self.assertEquals(7, list1.pop()) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1) - self.assertEquals(-1, list1.pop(0)) - self.assertEquals(5, list1.pop(5)) - self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5, 6, 6.5], list1) - self.assertEquals("foo", list2.pop()) - self.assertRaises(IndexError, list2.pop) - self.assertEquals([], list2) - - list1.remove(6.5) - self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5, 6], list1) - list1.remove(1) - self.assertEquals([0, 2, 3, 4, 1, 2, 5, 6], list1) - list1.remove(1) - self.assertEquals([0, 2, 3, 4, 2, 5, 6], list1) - self.assertRaises(ValueError, list1.remove, 1) - - list1.reverse() - self.assertEquals([6, 5, 2, 4, 3, 2, 0], list1) - - list1.sort() - self.assertEquals([0, 2, 2, 3, 4, 5, 6], list1) - list1.sort(reverse=True) - self.assertEquals([6, 5, 4, 3, 2, 2, 0], list1) - list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y)) # Distance from 3 - self.assertEquals([3, 4, 2, 2, 5, 6, 0], list1) - list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y), reverse=True) - self.assertEquals([6, 0, 5, 4, 2, 2, 3], list1) - list3.sort(key=lambda i: i[1]) - self.assertEquals([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) - list3.sort(key=lambda i: i[1], reverse=True) - self.assertEquals([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3) + self._test_list_methods(lambda L: SmartList(L)) def test_child_magics(self): + """make sure _ListProxy's magically implemented features work""" pass # if py3k: # __str__ @@ -320,20 +327,16 @@ class TestSmartList(unittest.TestCase): # __imul__ def test_child_methods(self): - pass - # append - # count - # extend - # index - # insert - # pop - # remove - # reverse - # sort + """make sure _ListProxy's non-magic methods work, like append()""" + self._test_list_methods(lambda L: SmartList(list(L))[:]) + self._test_list_methods(lambda L: SmartList([999] + list(L))[1:]) + self._test_list_methods(lambda L: SmartList(list(L) + [999])[:-1]) + builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] + self._test_list_methods(builder) def test_influence(self): + """make sure changes are propagated from parents to children""" pass - # test whether changes are propogated correctly # also test whether children that exit scope are removed from parent's map if __name__ == "__main__": From 49b9863b77e91e1199c8f036910b862b8fddf0fb Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 25 Mar 2013 15:56:38 -0400 Subject: [PATCH 047/115] Handle keyword arguments in some methods with py3k correctly. --- mwparserfromhell/string_mixin.py | 70 ++++++++++++++++++++++++++-------------- tests/test_string_mixin.py | 8 +++++ 2 files changed, 54 insertions(+), 24 deletions(-) diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index 7d269f5..efd28d8 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -143,19 +143,21 @@ class StringMixIn(object): if not py3k: @inheritdoc def decode(self, encoding=None, errors=None): - if errors is None: - if encoding is None: - return self.__unicode__().decode() - return self.__unicode__().decode(encoding) - return self.__unicode__().decode(encoding, errors) + kwargs = {} + if encoding is not None: + kwargs["encoding"] = encoding + if errors is not None: + kwargs["errors"] = errors + return self.__unicode__().decode(**kwargs) @inheritdoc def encode(self, encoding=None, errors=None): - if errors is None: - if encoding is None: - return self.__unicode__().encode() - return self.__unicode__().encode(encoding) - return self.__unicode__().encode(encoding, errors) + kwargs = {} + if encoding is not None: + kwargs["encoding"] = encoding + if errors is not None: + kwargs["errors"] = errors + return self.__unicode__().encode(**kwargs) @inheritdoc def endswith(self, prefix, start=None, end=None): @@ -286,25 +288,45 @@ class StringMixIn(object): def rpartition(self, sep): return self.__unicode__().rpartition(sep) - @inheritdoc - def rsplit(self, sep=None, maxsplit=None): - if maxsplit is None: - if sep is None: - return self.__unicode__().rsplit() - return self.__unicode__().rsplit(sep) - return self.__unicode__().rsplit(sep, maxsplit) + if py3k: + @inheritdoc + def rsplit(self, sep=None, maxsplit=None): + kwargs = {} + if sep is not None: + kwargs["sep"] = sep + if maxsplit is not None: + kwargs["maxsplit"] = maxsplit + return self.__unicode__().rsplit(**kwargs) + else: + @inheritdoc + def rsplit(self, sep=None, maxsplit=None): + if maxsplit is None: + if sep is None: + return self.__unicode__().rsplit() + return self.__unicode__().rsplit(sep) + return self.__unicode__().rsplit(sep, maxsplit) @inheritdoc def rstrip(self, chars=None): return self.__unicode__().rstrip(chars) - @inheritdoc - def split(self, sep=None, maxsplit=None): - if maxsplit is None: - if sep is None: - return self.__unicode__().split() - return self.__unicode__().split(sep) - return self.__unicode__().split(sep, maxsplit) + if py3k: + @inheritdoc + def split(self, sep=None, maxsplit=None): + kwargs = {} + if sep is not None: + kwargs["sep"] = sep + if maxsplit is not None: + kwargs["maxsplit"] = maxsplit + return self.__unicode__().split(**kwargs) + else: + @inheritdoc + def split(self, sep=None, maxsplit=None): + if maxsplit is None: + if sep is None: + return self.__unicode__().split() + return self.__unicode__().split(sep) + return self.__unicode__().split(sep, maxsplit) @inheritdoc def splitlines(self, keepends=None): diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 0d95311..8d86c8e 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -189,10 +189,14 @@ class TestStringMixIn(unittest.TestCase): self.assertEquals(b"fake string", str1.encode()) self.assertEquals(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", str3.encode("utf8")) + self.assertEquals(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", + str3.encode(encoding="utf8")) self.assertRaises(UnicodeEncodeError, str3.encode) self.assertRaises(UnicodeEncodeError, str3.encode, "ascii") self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict") + self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict") self.assertEquals("", str3.encode("ascii", "ignore")) + self.assertEquals("", str3.encode(errors="ignore")) self.assertTrue(str1.endswith("ing")) self.assertFalse(str1.endswith("ingh")) @@ -358,6 +362,8 @@ class TestStringMixIn(unittest.TestCase): self.assertEquals(actual, str25.rsplit(None, 3)) actual = [" this is a sentence with", "", "whitespace", ""] self.assertEquals(actual, str25.rsplit(" ", 3)) + if py3k: + self.assertEquals(actual, str25.rsplit(maxsplit=3)) self.assertEquals("fake string", str1.rstrip()) self.assertEquals(" fake string", str23.rstrip()) @@ -373,6 +379,8 @@ class TestStringMixIn(unittest.TestCase): self.assertEquals(actual, str25.split(None, 3)) actual = ["", "", "", "this is a sentence with whitespace "] self.assertEquals(actual, str25.split(" ", 3)) + if py3k: + self.assertEquals(actual, str25.split(maxsplit=3)) str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") self.assertEquals(["lines", "of", "text", "are", "presented", "here"], From 740db6ddfa86c3c52776ea57503ef9254f2bbd7a Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 25 Mar 2013 16:42:37 -0400 Subject: [PATCH 048/115] Implement some more tests; squash bugs in SmartList/_ListProxy --- mwparserfromhell/smart_list.py | 44 +++++-- tests/test_smart_list.py | 274 ++++++++++++++++++++--------------------- 2 files changed, 173 insertions(+), 145 deletions(-) diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py index 229500c..062e9ad 100644 --- a/mwparserfromhell/smart_list.py +++ b/mwparserfromhell/smart_list.py @@ -76,8 +76,8 @@ class SmartList(list): def __getitem__(self, key): if not isinstance(key, slice): return super(SmartList, self).__getitem__(key) - sliceinfo = [key.start or 0, maxsize if key.stop is None else key.stop, - key.step or 1] + keystop = maxsize if key.stop is None else key.stop + sliceinfo = [key.start or 0, keystop, key.step or 1] child = _ListProxy(self, sliceinfo) self._children[id(child)] = (child, sliceinfo) return child @@ -100,8 +100,8 @@ class SmartList(list): def __delitem__(self, key): super(SmartList, self).__delitem__(key) if isinstance(key, slice): - key = slice(key.start or 0, - maxsize if key.stop is None else key.stop) + keystop = maxsize if key.stop is None else key.stop + key = slice(key.start or 0, keystop) else: key = slice(key, key + 1) diff = key.stop - key.start @@ -241,18 +241,36 @@ class _ListProxy(list): def __setitem__(self, key, item): if isinstance(key, slice): - adjusted = slice(key.start + self._start, key.stop + self._stop, - key.step) + keystart = (key.start or 0) + self._start + if key.stop is None or key.stop == maxsize: + keystop = self._stop + else: + keystop = key.stop + self._start + adjusted = slice(keystart, keystop, key.step) self._parent[adjusted] = item else: + length = len(self) + if key < 0: + key = length + key + if key < 0 or key >= length: + raise IndexError("list assignment index out of range") self._parent[self._start + key] = item def __delitem__(self, key): if isinstance(key, slice): - adjusted = slice(key.start + self._start, key.stop + self._stop, - key.step) + keystart = (key.start or 0) + self._start + if key.stop is None or key.stop == maxsize: + keystop = self._stop + else: + keystop = key.stop + self._start + adjusted = slice(keystart, keystop, key.step) del self._parent[adjusted] else: + length = len(self) + if key < 0: + key = length + key + if key < 0 or key >= length: + raise IndexError("list assignment index out of range") del self._parent[self._start + key] def __iter__(self): @@ -290,6 +308,16 @@ class _ListProxy(list): self.extend(other) return self + def __mul__(self, other): + return SmartList(list(self) * other) + + def __rmul__(self, other): + return SmartList(other * list(self)) + + def __imul__(self, other): + self.extend(list(self) * (other - 1)) + return self + @property def _start(self): """The starting index of this list, inclusive.""" diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 44775b4..777660a 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -29,100 +29,15 @@ from mwparserfromhell.smart_list import SmartList, _ListProxy class TestSmartList(unittest.TestCase): """Test cases for the SmartList class and its child, _ListProxy.""" - def _test_list_methods(self, builder): - """Run tests on the public methods of a list built with *builder*.""" - list1 = builder(range(5)) - list2 = builder(["foo"]) - list3 = builder([("a", 5), ("d", 2), ("b", 8), ("c", 3)]) - - list1.append(5) - list1.append(1) - list1.append(2) - self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2], list1) - - self.assertEquals(0, list1.count(6)) - self.assertEquals(2, list1.count(1)) - - list1.extend(range(5, 8)) - self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) - - self.assertEquals(1, list1.index(1)) - self.assertEquals(6, list1.index(1, 3)) - self.assertEquals(6, list1.index(1, 3, 7)) - self.assertRaises(ValueError, list1.index, 1, 3, 5) - - list1.insert(0, -1) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) - list1.insert(-1, 6.5) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1) - list1.insert(13, 8) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1) - - self.assertEquals(8, list1.pop()) - self.assertEquals(7, list1.pop()) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1) - self.assertEquals(-1, list1.pop(0)) - self.assertEquals(5, list1.pop(5)) - self.assertEquals(6.5, list1.pop(-1)) - self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5, 6], list1) - self.assertEquals("foo", list2.pop()) - self.assertRaises(IndexError, list2.pop) - self.assertEquals([], list2) - - list1.remove(6) - self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5], list1) - list1.remove(1) - self.assertEquals([0, 2, 3, 4, 1, 2, 5], list1) - list1.remove(1) - self.assertEquals([0, 2, 3, 4, 2, 5], list1) - self.assertRaises(ValueError, list1.remove, 1) - - list1.reverse() - self.assertEquals([5, 2, 4, 3, 2, 0], list1) - - list1.sort() - self.assertEquals([0, 2, 2, 3, 4, 5], list1) - list1.sort(reverse=True) - self.assertEquals([5, 4, 3, 2, 2, 0], list1) - list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y)) # Distance from 3 - self.assertEquals([3, 4, 2, 2, 5, 0], list1) - list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y), reverse=True) - self.assertEquals([0, 5, 4, 2, 2, 3], list1) - list3.sort(key=lambda i: i[1]) - self.assertEquals([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) - list3.sort(key=lambda i: i[1], reverse=True) - self.assertEquals([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3) - - def test_docs(self): - """make sure the methods of SmartList/_ListProxy have docstrings""" - methods = ["append", "count", "extend", "index", "insert", "pop", - "remove", "reverse", "sort"] - for meth in methods: - expected = getattr(list, meth).__doc__ - smartlist_doc = getattr(SmartList, meth).__doc__ - listproxy_doc = getattr(_ListProxy, meth).__doc__ - self.assertEquals(expected, smartlist_doc) - self.assertEquals(expected, listproxy_doc) - - def test_doctest(self): - """make sure the test embedded in SmartList's docstring passes""" - parent = SmartList([0, 1, 2, 3]) - self.assertEquals([0, 1, 2, 3], parent) - child = parent[2:] - self.assertEquals([2, 3], child) - child.append(4) - self.assertEquals([2, 3, 4], child) - self.assertEquals([0, 1, 2, 3, 4], parent) - - def test_parent_get_set_del(self): - """make sure SmartList's getitem/setitem/delitem work""" + def _test_get_set_del_item(self, builder): + """Run tests on __get/set/delitem__ of a list built with *builder*.""" def assign(L, s1, s2, s3, val): L[s1:s2:s3] = val def delete(L, s1): del L[s1] - list1 = SmartList([0, 1, 2, 3, "one", "two"]) - list2 = SmartList(list(range(10))) + list1 = builder([0, 1, 2, 3, "one", "two"]) + list2 = builder(list(range(10))) self.assertEquals(1, list1[1]) self.assertEquals("one", list1[-2]) @@ -152,9 +67,11 @@ class TestSmartList(unittest.TestCase): list1[3] = 100 self.assertEquals(100, list1[3]) + list1[-3] = 101 + self.assertEquals([0, 1, 2, 101, "one", "two"], list1) list1[5:] = [6, 7, 8] self.assertEquals([6, 7, 8], list1[5:]) - self.assertEquals([0, 1, 2, 100, "one", 6, 7, 8], list1) + self.assertEquals([0, 1, 2, 101, "one", 6, 7, 8], list1) list1[2:4] = [-1, -2, -3, -4, -5] self.assertEquals([0, 1, -1, -2, -3, -4, -5, "one", 6, 7, 8], list1) list1[0:-3] = [99] @@ -185,10 +102,10 @@ class TestSmartList(unittest.TestCase): del list2[2:8:2] self.assertEquals([0, 1, 3, 5, 7, 8, 9], list2) - def test_parent_add(self): - """make sure SmartList's add/radd/iadd work""" - list1 = SmartList(range(5)) - list2 = SmartList(range(5, 10)) + def _test_add_radd_iadd(self, builder): + """Run tests on __r/i/add__ of a list built with *builder*.""" + list1 = builder(range(5)) + list2 = builder(range(5, 10)) self.assertEquals([0, 1, 2, 3, 4, 5, 6], list1 + [5, 6]) self.assertEquals([0, 1, 2, 3, 4], list1) self.assertEquals(list(range(10)), list1 + list2) @@ -197,12 +114,12 @@ class TestSmartList(unittest.TestCase): list1 += ["foo", "bar", "baz"] self.assertEquals([0, 1, 2, 3, 4, "foo", "bar", "baz"], list1) - def test_parent_unaffected_magics(self): - """sanity checks against SmartList features that were not modified""" - list1 = SmartList([0, 1, 2, 3, "one", "two"]) - list2 = SmartList([]) - list3 = SmartList([0, 2, 3, 4]) - list4 = SmartList([0, 1, 2]) + def _test_other_magic_methods(self, builder): + """Run tests on other magic methods of a list built with *builder*.""" + list1 = builder([0, 1, 2, 3, "one", "two"]) + list2 = builder([]) + list3 = builder([0, 2, 3, 4]) + list4 = builder([0, 1, 2]) if py3k: self.assertEquals("[0, 1, 2, 3, 'one', 'two']", str(list1)) @@ -284,47 +201,130 @@ class TestSmartList(unittest.TestCase): list4 *= 2 self.assertEquals([0, 1, 2, 0, 1, 2], list4) + def _test_list_methods(self, builder): + """Run tests on the public methods of a list built with *builder*.""" + list1 = builder(range(5)) + list2 = builder(["foo"]) + list3 = builder([("a", 5), ("d", 2), ("b", 8), ("c", 3)]) + + list1.append(5) + list1.append(1) + list1.append(2) + self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2], list1) + + self.assertEquals(0, list1.count(6)) + self.assertEquals(2, list1.count(1)) + + list1.extend(range(5, 8)) + self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) + + self.assertEquals(1, list1.index(1)) + self.assertEquals(6, list1.index(1, 3)) + self.assertEquals(6, list1.index(1, 3, 7)) + self.assertRaises(ValueError, list1.index, 1, 3, 5) + + list1.insert(0, -1) + self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) + list1.insert(-1, 6.5) + self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1) + list1.insert(13, 8) + self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1) + + self.assertEquals(8, list1.pop()) + self.assertEquals(7, list1.pop()) + self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1) + self.assertEquals(-1, list1.pop(0)) + self.assertEquals(5, list1.pop(5)) + self.assertEquals(6.5, list1.pop(-1)) + self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5, 6], list1) + self.assertEquals("foo", list2.pop()) + self.assertRaises(IndexError, list2.pop) + self.assertEquals([], list2) + + list1.remove(6) + self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5], list1) + list1.remove(1) + self.assertEquals([0, 2, 3, 4, 1, 2, 5], list1) + list1.remove(1) + self.assertEquals([0, 2, 3, 4, 2, 5], list1) + self.assertRaises(ValueError, list1.remove, 1) + + list1.reverse() + self.assertEquals([5, 2, 4, 3, 2, 0], list1) + + list1.sort() + self.assertEquals([0, 2, 2, 3, 4, 5], list1) + list1.sort(reverse=True) + self.assertEquals([5, 4, 3, 2, 2, 0], list1) + list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y)) # Distance from 3 + self.assertEquals([3, 4, 2, 2, 5, 0], list1) + list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y), reverse=True) + self.assertEquals([0, 5, 4, 2, 2, 3], list1) + list3.sort(key=lambda i: i[1]) + self.assertEquals([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) + list3.sort(key=lambda i: i[1], reverse=True) + self.assertEquals([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3) + + def test_docs(self): + """make sure the methods of SmartList/_ListProxy have docstrings""" + methods = ["append", "count", "extend", "index", "insert", "pop", + "remove", "reverse", "sort"] + for meth in methods: + expected = getattr(list, meth).__doc__ + smartlist_doc = getattr(SmartList, meth).__doc__ + listproxy_doc = getattr(_ListProxy, meth).__doc__ + self.assertEquals(expected, smartlist_doc) + self.assertEquals(expected, listproxy_doc) + + def test_doctest(self): + """make sure the test embedded in SmartList's docstring passes""" + parent = SmartList([0, 1, 2, 3]) + self.assertEquals([0, 1, 2, 3], parent) + child = parent[2:] + self.assertEquals([2, 3], child) + child.append(4) + self.assertEquals([2, 3, 4], child) + self.assertEquals([0, 1, 2, 3, 4], parent) + + def test_parent_get_set_del(self): + """make sure SmartList's getitem/setitem/delitem work""" + self._test_get_set_del_item(lambda L: SmartList(L)) + + def test_parent_add(self): + """make sure SmartList's add/radd/iadd work""" + self._test_add_radd_iadd(lambda L: SmartList(L)) + + def test_parent_unaffected_magics(self): + """sanity checks against SmartList features that were not modified""" + self._test_other_magic_methods(lambda L: SmartList(L)) + def test_parent_methods(self): """make sure SmartList's non-magic methods work, like append()""" self._test_list_methods(lambda L: SmartList(L)) - def test_child_magics(self): - """make sure _ListProxy's magically implemented features work""" - pass - # if py3k: - # __str__ - # __bytes__ - # else: - # __unicode__ - # __str__ - # __repr__ - # __lt__ - # __le__ - # __eq__ - # __ne__ - # __gt__ - # __ge__ - # if py3k: - # __bool__ - # else: - # __nonzero__ - # __len__ - # __getitem__ - # __setitem__ - # __delitem__ - # __iter__ - # __reversed__ - # __contains__ - # if not py3k: - # __getslice__ - # __setslice__ - # __delslice__ - # __add__ - # __radd__ - # __iadd__ - # __mul__ - # __rmul__ - # __imul__ + def test_child_get_set_del(self): + """make sure _ListProxy's getitem/setitem/delitem work""" + self._test_get_set_del_item(lambda L: SmartList(list(L))[:]) + self._test_get_set_del_item(lambda L: SmartList([999] + list(L))[1:]) + # self._test_get_set_del_item(lambda L: SmartList(list(L) + [999])[:-1]) + # builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] + # self._test_get_set_del_item(builder) + + def test_child_add(self): + """make sure _ListProxy's add/radd/iadd work""" + self._test_add_radd_iadd(lambda L: SmartList(list(L))[:]) + self._test_add_radd_iadd(lambda L: SmartList([999] + list(L))[1:]) + self._test_add_radd_iadd(lambda L: SmartList(list(L) + [999])[:-1]) + builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] + self._test_add_radd_iadd(builder) + + def test_child_other_magics(self): + """make sure _ListProxy's other magically implemented features work""" + self._test_other_magic_methods(lambda L: SmartList(list(L))[:]) + self._test_other_magic_methods(lambda L: SmartList([999] + list(L))[1:]) + self._test_other_magic_methods(lambda L: SmartList(list(L) + [999])[:-1]) + builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] + self._test_other_magic_methods(builder) def test_child_methods(self): """make sure _ListProxy's non-magic methods work, like append()""" From b298a68b37444ff2674ce7699e1bc85d610df547 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 25 Mar 2013 16:54:01 -0400 Subject: [PATCH 049/115] Squash a bug dealing with extended slices. --- mwparserfromhell/smart_list.py | 11 ++++++----- tests/test_smart_list.py | 6 +++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py index 062e9ad..46c475a 100644 --- a/mwparserfromhell/smart_list.py +++ b/mwparserfromhell/smart_list.py @@ -87,8 +87,9 @@ class SmartList(list): return super(SmartList, self).__setitem__(key, item) item = list(item) super(SmartList, self).__setitem__(key, item) - key = slice(key.start or 0, maxsize if key.stop is None else key.stop) - diff = len(item) - key.stop + key.start + keystop = maxsize if key.stop is None else key.stop + key = slice(key.start or 0, keystop, key.step or 1) + diff = len(item) + (key.start - key.stop) / key.step values = self._children.values if py3k else self._children.itervalues if diff: for child, (start, stop, step) in values(): @@ -101,10 +102,10 @@ class SmartList(list): super(SmartList, self).__delitem__(key) if isinstance(key, slice): keystop = maxsize if key.stop is None else key.stop - key = slice(key.start or 0, keystop) + key = slice(key.start or 0, keystop, key.step or 1) else: - key = slice(key, key + 1) - diff = key.stop - key.start + key = slice(key, key + 1, 1) + diff = (key.stop - key.start) / key.step values = self._children.values if py3k else self._children.itervalues for child, (start, stop, step) in values(): if start > key.start: diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 777660a..10e39ea 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -306,9 +306,9 @@ class TestSmartList(unittest.TestCase): """make sure _ListProxy's getitem/setitem/delitem work""" self._test_get_set_del_item(lambda L: SmartList(list(L))[:]) self._test_get_set_del_item(lambda L: SmartList([999] + list(L))[1:]) - # self._test_get_set_del_item(lambda L: SmartList(list(L) + [999])[:-1]) - # builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] - # self._test_get_set_del_item(builder) + self._test_get_set_del_item(lambda L: SmartList(list(L) + [999])[:-1]) + builder = lambda L: SmartList([101, 102] + list(L) + [201, 202])[2:-2] + self._test_get_set_del_item(builder) def test_child_add(self): """make sure _ListProxy's add/radd/iadd work""" From b6284195d31543aca2a1d4e1742ce3f649217b14 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 25 Mar 2013 17:08:18 -0400 Subject: [PATCH 050/115] Implement first part of test_influence(). --- tests/test_smart_list.py | 49 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 10e39ea..b0a10cb 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -336,7 +336,54 @@ class TestSmartList(unittest.TestCase): def test_influence(self): """make sure changes are propagated from parents to children""" - pass + parent = SmartList([0, 1, 2, 3, 4, 5]) + child1 = parent[2:] + child2 = parent[2:5] + + parent.append(6) + child1.append(7) + child2.append(4.5) + self.assertEquals([0, 1, 2, 3, 4, 4.5, 5, 6, 7], parent) + self.assertEquals([2, 3, 4, 4.5, 5, 6, 7], child1) + self.assertEquals([2, 3, 4, 4.5], child2) + + parent.insert(0, -1) + parent.insert(4, 2.5) + parent.insert(10, 6.5) + self.assertEquals([-1, 0, 1, 2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], parent) + self.assertEquals([2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], child1) + self.assertEquals([2, 2.5, 3, 4, 4.5], child2) + + self.assertEquals(7, parent.pop()) + self.assertEquals(6.5, child1.pop()) + self.assertEquals(4.5, child2.pop()) + self.assertEquals([-1, 0, 1, 2, 2.5, 3, 4, 5, 6], parent) + self.assertEquals([2, 2.5, 3, 4, 5, 6], child1) + self.assertEquals([2, 2.5, 3, 4], child2) + + parent.remove(-1) + child1.remove(2.5) + self.assertEquals([0, 1, 2, 3, 4, 5, 6], parent) + self.assertEquals([2, 3, 4, 5, 6], child1) + self.assertEquals([2, 3, 4], child2) + + self.assertEquals(0, parent.pop(0)) + self.assertEquals([1, 2, 3, 4, 5, 6], parent) + self.assertEquals([2, 3, 4, 5, 6], child1) + self.assertEquals([2, 3, 4], child2) + + child2.reverse() + self.assertEquals([1, 4, 3, 2, 5, 6], parent) + self.assertEquals([4, 3, 2, 5, 6], child1) + self.assertEquals([4, 3, 2], child2) + + parent.extend([7, 8]) + child1.extend([8.1, 8.2]) + child2.extend([1.9, 1.8]) + self.assertEquals([1, 4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], parent) + self.assertEquals([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) + self.assertEquals([4, 3, 2, 1.9, 1.8], child2) + # also test whether children that exit scope are removed from parent's map if __name__ == "__main__": From 34b85a93cd425f3b9c1b2d91fa7d5625b284f171 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 25 Mar 2013 17:33:29 -0400 Subject: [PATCH 051/115] Cosmetic change: standardize whitespace after class definition. --- mwparserfromhell/nodes/argument.py | 1 + mwparserfromhell/nodes/comment.py | 1 + mwparserfromhell/nodes/text.py | 1 + mwparserfromhell/nodes/wikilink.py | 1 + mwparserfromhell/string_mixin.py | 1 + tests/_test_tokenizer.py | 1 + tests/test_ctokenizer.py | 1 + tests/test_docs.py | 1 + tests/test_pytokenizer.py | 1 + tests/test_smart_list.py | 2 -- tests/test_string_mixin.py | 1 + 11 files changed, 10 insertions(+), 2 deletions(-) diff --git a/mwparserfromhell/nodes/argument.py b/mwparserfromhell/nodes/argument.py index 06facb4..d7db92a 100644 --- a/mwparserfromhell/nodes/argument.py +++ b/mwparserfromhell/nodes/argument.py @@ -30,6 +30,7 @@ __all__ = ["Argument"] class Argument(Node): """Represents a template argument substitution, like ``{{{foo}}}``.""" + def __init__(self, name, default=None): super(Argument, self).__init__() self._name = name diff --git a/mwparserfromhell/nodes/comment.py b/mwparserfromhell/nodes/comment.py index b34c29e..e96ce38 100644 --- a/mwparserfromhell/nodes/comment.py +++ b/mwparserfromhell/nodes/comment.py @@ -29,6 +29,7 @@ __all__ = ["Comment"] class Comment(Node): """Represents a hidden HTML comment, like ````.""" + def __init__(self, contents): super(Comment, self).__init__() self._contents = contents diff --git a/mwparserfromhell/nodes/text.py b/mwparserfromhell/nodes/text.py index 60ba847..6fda3da 100644 --- a/mwparserfromhell/nodes/text.py +++ b/mwparserfromhell/nodes/text.py @@ -29,6 +29,7 @@ __all__ = ["Text"] class Text(Node): """Represents ordinary, unformatted text with no special properties.""" + def __init__(self, value): super(Text, self).__init__() self._value = value diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py index f880016..6fea468 100644 --- a/mwparserfromhell/nodes/wikilink.py +++ b/mwparserfromhell/nodes/wikilink.py @@ -30,6 +30,7 @@ __all__ = ["Wikilink"] class Wikilink(Node): """Represents an internal wikilink, like ``[[Foo|Bar]]``.""" + def __init__(self, title, text=None): super(Wikilink, self).__init__() self._title = title diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index efd28d8..eee58b9 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -50,6 +50,7 @@ class StringMixIn(object): :py:meth:`__unicode__` instead of the immutable ``self`` like the regular ``str`` type. """ + if py3k: def __str__(self): return self.__unicode__() diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 4d12dc9..379b4fa 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -38,6 +38,7 @@ class TokenizerTestCase(object): TestCTokenizer. Tests are loaded dynamically from files in the 'tokenizer' directory. """ + @classmethod def _build_test_method(cls, funcname, data): """Create and return a method to be treated as a test case method. diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 07b5290..4dbeceb 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -27,6 +27,7 @@ from _test_tokenizer import TokenizerTestCase class TestCTokenizer(TokenizerTestCase, unittest.TestCase): """Test cases for the C tokenizer.""" + @classmethod def setUpClass(cls): from mwparserfromhell.parser._tokenizer import CTokenizer diff --git a/tests/test_docs.py b/tests/test_docs.py index 8673cb9..075b0a7 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -30,6 +30,7 @@ from mwparserfromhell.compat import py3k, str, StringIO class TestDocs(unittest.TestCase): """Integration test cases for mwparserfromhell's documentation.""" + def assertPrint(self, input, output): """Assertion check that *input*, when printed, produces *output*.""" buff = StringIO() diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index a2f2482..73e6fe7 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -27,6 +27,7 @@ from _test_tokenizer import TokenizerTestCase class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): """Test cases for the Python tokenizer.""" + @classmethod def setUpClass(cls): from mwparserfromhell.parser.tokenizer import Tokenizer diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index b0a10cb..f6d22ae 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -384,7 +384,5 @@ class TestSmartList(unittest.TestCase): self.assertEquals([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) self.assertEquals([4, 3, 2, 1.9, 1.8], child2) - # also test whether children that exit scope are removed from parent's map - if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 8d86c8e..7b99995 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -37,6 +37,7 @@ class _FakeString(StringMixIn): class TestStringMixIn(unittest.TestCase): """Test cases for the StringMixIn class.""" + def test_docs(self): """make sure the various methods of StringMixIn have docstrings""" methods = [ From 6a741db7ce98239108f21004b2a9d2f99a63f90f Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 25 Mar 2013 18:25:03 -0400 Subject: [PATCH 052/115] Applying fb71f5507eca7bc73fae764549a7579889817cba --- mwparserfromhell/parser/__init__.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/mwparserfromhell/parser/__init__.py b/mwparserfromhell/parser/__init__.py index 074b9ba..1fb95b5 100644 --- a/mwparserfromhell/parser/__init__.py +++ b/mwparserfromhell/parser/__init__.py @@ -26,16 +26,16 @@ modules: the :py:mod:`~.tokenizer` and the :py:mod:`~.builder`. This module joins them together under one interface. """ +from .builder import Builder +from .tokenizer import Tokenizer try: - from ._builder import CBuilder as Builder + from ._tokenizer import CTokenizer + use_c = True except ImportError: - from .builder import Builder -try: - from ._tokenizer import CTokenizer as Tokenizer -except ImportError: - from .tokenizer import Tokenizer + CTokenizer = None + use_c = False -__all__ = ["Parser"] +__all__ = ["use_c", "Parser"] class Parser(object): """Represents a parser for wikicode. @@ -48,7 +48,10 @@ class Parser(object): def __init__(self, text): self.text = text - self._tokenizer = Tokenizer() + if use_c and CTokenizer: + self._tokenizer = CTokenizer() + else: + self._tokenizer = Tokenizer() self._builder = Builder() def parse(self): From 9e26264d6b8d462cd93bc4c475c91abfe6d3b501 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 25 Mar 2013 19:13:32 -0400 Subject: [PATCH 053/115] Replace deprecated alias assertEquals() with assertEqual(). --- tests/test_smart_list.py | 244 ++++++++++++++++++++++----------------------- tests/test_string_mixin.py | 228 +++++++++++++++++++++--------------------- tests/test_tokens.py | 24 ++--- 3 files changed, 248 insertions(+), 248 deletions(-) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index f6d22ae..680de9d 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -39,80 +39,80 @@ class TestSmartList(unittest.TestCase): list1 = builder([0, 1, 2, 3, "one", "two"]) list2 = builder(list(range(10))) - self.assertEquals(1, list1[1]) - self.assertEquals("one", list1[-2]) - self.assertEquals([2, 3], list1[2:4]) + self.assertEqual(1, list1[1]) + self.assertEqual("one", list1[-2]) + self.assertEqual([2, 3], list1[2:4]) self.assertRaises(IndexError, lambda: list1[6]) self.assertRaises(IndexError, lambda: list1[-7]) - self.assertEquals([0, 1, 2], list1[:3]) - self.assertEquals([0, 1, 2, 3, "one", "two"], list1[:]) - self.assertEquals([3, "one", "two"], list1[3:]) - self.assertEquals(["one", "two"], list1[-2:]) - self.assertEquals([0, 1], list1[:-4]) - self.assertEquals([], list1[6:]) - self.assertEquals([], list1[4:2]) - - self.assertEquals([0, 2, "one"], list1[0:5:2]) - self.assertEquals([0, 2], list1[0:-3:2]) - self.assertEquals([0, 1, 2, 3, "one", "two"], list1[::]) - self.assertEquals([2, 3, "one", "two"], list1[2::]) - self.assertEquals([0, 1, 2, 3], list1[:4:]) - self.assertEquals([2, 3], list1[2:4:]) - self.assertEquals([0, 2, 4, 6, 8], list2[::2]) - self.assertEquals([2, 5, 8], list2[2::3]) - self.assertEquals([0, 3], list2[:6:3]) - self.assertEquals([2, 5, 8], list2[-8:9:3]) - self.assertEquals([], list2[100000:1000:-100]) + self.assertEqual([0, 1, 2], list1[:3]) + self.assertEqual([0, 1, 2, 3, "one", "two"], list1[:]) + self.assertEqual([3, "one", "two"], list1[3:]) + self.assertEqual(["one", "two"], list1[-2:]) + self.assertEqual([0, 1], list1[:-4]) + self.assertEqual([], list1[6:]) + self.assertEqual([], list1[4:2]) + + self.assertEqual([0, 2, "one"], list1[0:5:2]) + self.assertEqual([0, 2], list1[0:-3:2]) + self.assertEqual([0, 1, 2, 3, "one", "two"], list1[::]) + self.assertEqual([2, 3, "one", "two"], list1[2::]) + self.assertEqual([0, 1, 2, 3], list1[:4:]) + self.assertEqual([2, 3], list1[2:4:]) + self.assertEqual([0, 2, 4, 6, 8], list2[::2]) + self.assertEqual([2, 5, 8], list2[2::3]) + self.assertEqual([0, 3], list2[:6:3]) + self.assertEqual([2, 5, 8], list2[-8:9:3]) + self.assertEqual([], list2[100000:1000:-100]) list1[3] = 100 - self.assertEquals(100, list1[3]) + self.assertEqual(100, list1[3]) list1[-3] = 101 - self.assertEquals([0, 1, 2, 101, "one", "two"], list1) + self.assertEqual([0, 1, 2, 101, "one", "two"], list1) list1[5:] = [6, 7, 8] - self.assertEquals([6, 7, 8], list1[5:]) - self.assertEquals([0, 1, 2, 101, "one", 6, 7, 8], list1) + self.assertEqual([6, 7, 8], list1[5:]) + self.assertEqual([0, 1, 2, 101, "one", 6, 7, 8], list1) list1[2:4] = [-1, -2, -3, -4, -5] - self.assertEquals([0, 1, -1, -2, -3, -4, -5, "one", 6, 7, 8], list1) + self.assertEqual([0, 1, -1, -2, -3, -4, -5, "one", 6, 7, 8], list1) list1[0:-3] = [99] - self.assertEquals([99, 6, 7, 8], list1) + self.assertEqual([99, 6, 7, 8], list1) list2[0:6:2] = [100, 102, 104] - self.assertEquals([100, 1, 102, 3, 104, 5, 6, 7, 8, 9], list2) + self.assertEqual([100, 1, 102, 3, 104, 5, 6, 7, 8, 9], list2) list2[::3] = [200, 203, 206, 209] - self.assertEquals([200, 1, 102, 203, 104, 5, 206, 7, 8, 209], list2) + self.assertEqual([200, 1, 102, 203, 104, 5, 206, 7, 8, 209], list2) list2[::] = range(7) - self.assertEquals([0, 1, 2, 3, 4, 5, 6], list2) + self.assertEqual([0, 1, 2, 3, 4, 5, 6], list2) self.assertRaises(ValueError, assign, list2, 0, 5, 2, [100, 102, 104, 106]) del list2[2] - self.assertEquals([0, 1, 3, 4, 5, 6], list2) + self.assertEqual([0, 1, 3, 4, 5, 6], list2) del list2[-3] - self.assertEquals([0, 1, 3, 5, 6], list2) + self.assertEqual([0, 1, 3, 5, 6], list2) self.assertRaises(IndexError, delete, list2, 100) self.assertRaises(IndexError, delete, list2, -6) list2[:] = range(10) del list2[3:6] - self.assertEquals([0, 1, 2, 6, 7, 8, 9], list2) + self.assertEqual([0, 1, 2, 6, 7, 8, 9], list2) del list2[-2:] - self.assertEquals([0, 1, 2, 6, 7], list2) + self.assertEqual([0, 1, 2, 6, 7], list2) del list2[:2] - self.assertEquals([2, 6, 7], list2) + self.assertEqual([2, 6, 7], list2) list2[:] = range(10) del list2[2:8:2] - self.assertEquals([0, 1, 3, 5, 7, 8, 9], list2) + self.assertEqual([0, 1, 3, 5, 7, 8, 9], list2) def _test_add_radd_iadd(self, builder): """Run tests on __r/i/add__ of a list built with *builder*.""" list1 = builder(range(5)) list2 = builder(range(5, 10)) - self.assertEquals([0, 1, 2, 3, 4, 5, 6], list1 + [5, 6]) - self.assertEquals([0, 1, 2, 3, 4], list1) - self.assertEquals(list(range(10)), list1 + list2) - self.assertEquals([-2, -1, 0, 1, 2, 3, 4], [-2, -1] + list1) - self.assertEquals([0, 1, 2, 3, 4], list1) + self.assertEqual([0, 1, 2, 3, 4, 5, 6], list1 + [5, 6]) + self.assertEqual([0, 1, 2, 3, 4], list1) + self.assertEqual(list(range(10)), list1 + list2) + self.assertEqual([-2, -1, 0, 1, 2, 3, 4], [-2, -1] + list1) + self.assertEqual([0, 1, 2, 3, 4], list1) list1 += ["foo", "bar", "baz"] - self.assertEquals([0, 1, 2, 3, 4, "foo", "bar", "baz"], list1) + self.assertEqual([0, 1, 2, 3, 4, "foo", "bar", "baz"], list1) def _test_other_magic_methods(self, builder): """Run tests on other magic methods of a list built with *builder*.""" @@ -122,13 +122,13 @@ class TestSmartList(unittest.TestCase): list4 = builder([0, 1, 2]) if py3k: - self.assertEquals("[0, 1, 2, 3, 'one', 'two']", str(list1)) - self.assertEquals(b"[0, 1, 2, 3, 'one', 'two']", bytes(list1)) - self.assertEquals("[0, 1, 2, 3, 'one', 'two']", repr(list1)) + self.assertEqual("[0, 1, 2, 3, 'one', 'two']", str(list1)) + self.assertEqual(b"[0, 1, 2, 3, 'one', 'two']", bytes(list1)) + self.assertEqual("[0, 1, 2, 3, 'one', 'two']", repr(list1)) else: - self.assertEquals("[0, 1, 2, 3, u'one', u'two']", unicode(list1)) - self.assertEquals(b"[0, 1, 2, 3, u'one', u'two']", str(list1)) - self.assertEquals(b"[0, 1, 2, 3, u'one', u'two']", repr(list1)) + self.assertEqual("[0, 1, 2, 3, u'one', u'two']", unicode(list1)) + self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", str(list1)) + self.assertEqual(b"[0, 1, 2, 3, u'one', u'two']", repr(list1)) self.assertTrue(list1 < list3) self.assertTrue(list1 <= list3) @@ -164,42 +164,42 @@ class TestSmartList(unittest.TestCase): self.assertTrue(bool(list1)) self.assertFalse(bool(list2)) - self.assertEquals(6, len(list1)) - self.assertEquals(0, len(list2)) + self.assertEqual(6, len(list1)) + self.assertEqual(0, len(list2)) out = [] for obj in list1: out.append(obj) - self.assertEquals([0, 1, 2, 3, "one", "two"], out) + self.assertEqual([0, 1, 2, 3, "one", "two"], out) out = [] for ch in list2: out.append(ch) - self.assertEquals([], out) + self.assertEqual([], out) gen1 = iter(list1) out = [] for i in range(len(list1)): out.append(gen1.next()) self.assertRaises(StopIteration, gen1.next) - self.assertEquals([0, 1, 2, 3, "one", "two"], out) + self.assertEqual([0, 1, 2, 3, "one", "two"], out) gen2 = iter(list2) self.assertRaises(StopIteration, gen2.next) - self.assertEquals(["two", "one", 3, 2, 1, 0], list(reversed(list1))) - self.assertEquals([], list(reversed(list2))) + self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1))) + self.assertEqual([], list(reversed(list2))) self.assertTrue("one" in list1) self.assertTrue(3 in list1) self.assertFalse(10 in list1) self.assertFalse(0 in list2) - self.assertEquals([], list2 * 5) - self.assertEquals([], 5 * list2) - self.assertEquals([0, 1, 2, 0, 1, 2, 0, 1, 2], list4 * 3) - self.assertEquals([0, 1, 2, 0, 1, 2, 0, 1, 2], 3 * list4) + self.assertEqual([], list2 * 5) + self.assertEqual([], 5 * list2) + self.assertEqual([0, 1, 2, 0, 1, 2, 0, 1, 2], list4 * 3) + self.assertEqual([0, 1, 2, 0, 1, 2, 0, 1, 2], 3 * list4) list4 *= 2 - self.assertEquals([0, 1, 2, 0, 1, 2], list4) + self.assertEqual([0, 1, 2, 0, 1, 2], list4) def _test_list_methods(self, builder): """Run tests on the public methods of a list built with *builder*.""" @@ -210,60 +210,60 @@ class TestSmartList(unittest.TestCase): list1.append(5) list1.append(1) list1.append(2) - self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2], list1) + self.assertEqual([0, 1, 2, 3, 4, 5, 1, 2], list1) - self.assertEquals(0, list1.count(6)) - self.assertEquals(2, list1.count(1)) + self.assertEqual(0, list1.count(6)) + self.assertEqual(2, list1.count(1)) list1.extend(range(5, 8)) - self.assertEquals([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) + self.assertEqual([0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) - self.assertEquals(1, list1.index(1)) - self.assertEquals(6, list1.index(1, 3)) - self.assertEquals(6, list1.index(1, 3, 7)) + self.assertEqual(1, list1.index(1)) + self.assertEqual(6, list1.index(1, 3)) + self.assertEqual(6, list1.index(1, 3, 7)) self.assertRaises(ValueError, list1.index, 1, 3, 5) list1.insert(0, -1) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) + self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 7], list1) list1.insert(-1, 6.5) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1) + self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7], list1) list1.insert(13, 8) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1) - - self.assertEquals(8, list1.pop()) - self.assertEquals(7, list1.pop()) - self.assertEquals([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1) - self.assertEquals(-1, list1.pop(0)) - self.assertEquals(5, list1.pop(5)) - self.assertEquals(6.5, list1.pop(-1)) - self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5, 6], list1) - self.assertEquals("foo", list2.pop()) + self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5, 7, 8], list1) + + self.assertEqual(8, list1.pop()) + self.assertEqual(7, list1.pop()) + self.assertEqual([-1, 0, 1, 2, 3, 4, 5, 1, 2, 5, 6, 6.5], list1) + self.assertEqual(-1, list1.pop(0)) + self.assertEqual(5, list1.pop(5)) + self.assertEqual(6.5, list1.pop(-1)) + self.assertEqual([0, 1, 2, 3, 4, 1, 2, 5, 6], list1) + self.assertEqual("foo", list2.pop()) self.assertRaises(IndexError, list2.pop) - self.assertEquals([], list2) + self.assertEqual([], list2) list1.remove(6) - self.assertEquals([0, 1, 2, 3, 4, 1, 2, 5], list1) + self.assertEqual([0, 1, 2, 3, 4, 1, 2, 5], list1) list1.remove(1) - self.assertEquals([0, 2, 3, 4, 1, 2, 5], list1) + self.assertEqual([0, 2, 3, 4, 1, 2, 5], list1) list1.remove(1) - self.assertEquals([0, 2, 3, 4, 2, 5], list1) + self.assertEqual([0, 2, 3, 4, 2, 5], list1) self.assertRaises(ValueError, list1.remove, 1) list1.reverse() - self.assertEquals([5, 2, 4, 3, 2, 0], list1) + self.assertEqual([5, 2, 4, 3, 2, 0], list1) list1.sort() - self.assertEquals([0, 2, 2, 3, 4, 5], list1) + self.assertEqual([0, 2, 2, 3, 4, 5], list1) list1.sort(reverse=True) - self.assertEquals([5, 4, 3, 2, 2, 0], list1) + self.assertEqual([5, 4, 3, 2, 2, 0], list1) list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y)) # Distance from 3 - self.assertEquals([3, 4, 2, 2, 5, 0], list1) + self.assertEqual([3, 4, 2, 2, 5, 0], list1) list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y), reverse=True) - self.assertEquals([0, 5, 4, 2, 2, 3], list1) + self.assertEqual([0, 5, 4, 2, 2, 3], list1) list3.sort(key=lambda i: i[1]) - self.assertEquals([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) + self.assertEqual([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) list3.sort(key=lambda i: i[1], reverse=True) - self.assertEquals([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3) + self.assertEqual([("b", 8), ("a", 5), ("c", 3), ("d", 2)], list3) def test_docs(self): """make sure the methods of SmartList/_ListProxy have docstrings""" @@ -273,18 +273,18 @@ class TestSmartList(unittest.TestCase): expected = getattr(list, meth).__doc__ smartlist_doc = getattr(SmartList, meth).__doc__ listproxy_doc = getattr(_ListProxy, meth).__doc__ - self.assertEquals(expected, smartlist_doc) - self.assertEquals(expected, listproxy_doc) + self.assertEqual(expected, smartlist_doc) + self.assertEqual(expected, listproxy_doc) def test_doctest(self): """make sure the test embedded in SmartList's docstring passes""" parent = SmartList([0, 1, 2, 3]) - self.assertEquals([0, 1, 2, 3], parent) + self.assertEqual([0, 1, 2, 3], parent) child = parent[2:] - self.assertEquals([2, 3], child) + self.assertEqual([2, 3], child) child.append(4) - self.assertEquals([2, 3, 4], child) - self.assertEquals([0, 1, 2, 3, 4], parent) + self.assertEqual([2, 3, 4], child) + self.assertEqual([0, 1, 2, 3, 4], parent) def test_parent_get_set_del(self): """make sure SmartList's getitem/setitem/delitem work""" @@ -343,46 +343,46 @@ class TestSmartList(unittest.TestCase): parent.append(6) child1.append(7) child2.append(4.5) - self.assertEquals([0, 1, 2, 3, 4, 4.5, 5, 6, 7], parent) - self.assertEquals([2, 3, 4, 4.5, 5, 6, 7], child1) - self.assertEquals([2, 3, 4, 4.5], child2) + self.assertEqual([0, 1, 2, 3, 4, 4.5, 5, 6, 7], parent) + self.assertEqual([2, 3, 4, 4.5, 5, 6, 7], child1) + self.assertEqual([2, 3, 4, 4.5], child2) parent.insert(0, -1) parent.insert(4, 2.5) parent.insert(10, 6.5) - self.assertEquals([-1, 0, 1, 2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], parent) - self.assertEquals([2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], child1) - self.assertEquals([2, 2.5, 3, 4, 4.5], child2) + self.assertEqual([-1, 0, 1, 2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], parent) + self.assertEqual([2, 2.5, 3, 4, 4.5, 5, 6, 6.5, 7], child1) + self.assertEqual([2, 2.5, 3, 4, 4.5], child2) - self.assertEquals(7, parent.pop()) - self.assertEquals(6.5, child1.pop()) - self.assertEquals(4.5, child2.pop()) - self.assertEquals([-1, 0, 1, 2, 2.5, 3, 4, 5, 6], parent) - self.assertEquals([2, 2.5, 3, 4, 5, 6], child1) - self.assertEquals([2, 2.5, 3, 4], child2) + self.assertEqual(7, parent.pop()) + self.assertEqual(6.5, child1.pop()) + self.assertEqual(4.5, child2.pop()) + self.assertEqual([-1, 0, 1, 2, 2.5, 3, 4, 5, 6], parent) + self.assertEqual([2, 2.5, 3, 4, 5, 6], child1) + self.assertEqual([2, 2.5, 3, 4], child2) parent.remove(-1) child1.remove(2.5) - self.assertEquals([0, 1, 2, 3, 4, 5, 6], parent) - self.assertEquals([2, 3, 4, 5, 6], child1) - self.assertEquals([2, 3, 4], child2) + self.assertEqual([0, 1, 2, 3, 4, 5, 6], parent) + self.assertEqual([2, 3, 4, 5, 6], child1) + self.assertEqual([2, 3, 4], child2) - self.assertEquals(0, parent.pop(0)) - self.assertEquals([1, 2, 3, 4, 5, 6], parent) - self.assertEquals([2, 3, 4, 5, 6], child1) - self.assertEquals([2, 3, 4], child2) + self.assertEqual(0, parent.pop(0)) + self.assertEqual([1, 2, 3, 4, 5, 6], parent) + self.assertEqual([2, 3, 4, 5, 6], child1) + self.assertEqual([2, 3, 4], child2) child2.reverse() - self.assertEquals([1, 4, 3, 2, 5, 6], parent) - self.assertEquals([4, 3, 2, 5, 6], child1) - self.assertEquals([4, 3, 2], child2) + self.assertEqual([1, 4, 3, 2, 5, 6], parent) + self.assertEqual([4, 3, 2, 5, 6], child1) + self.assertEqual([4, 3, 2], child2) parent.extend([7, 8]) child1.extend([8.1, 8.2]) child2.extend([1.9, 1.8]) - self.assertEquals([1, 4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], parent) - self.assertEquals([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) - self.assertEquals([4, 3, 2, 1.9, 1.8], child2) + self.assertEqual([1, 4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], parent) + self.assertEqual([4, 3, 2, 1.9, 1.8, 5, 6, 7, 8, 8.1, 8.2], child1) + self.assertEqual([4, 3, 2, 1.9, 1.8], child2) if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 7b99995..6ef6344 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -56,17 +56,17 @@ class TestStringMixIn(unittest.TestCase): for meth in methods: expected = getattr(str, meth).__doc__ actual = getattr(StringMixIn, meth).__doc__ - self.assertEquals(expected, actual) + self.assertEqual(expected, actual) def test_types(self): """make sure StringMixIns convert to different types correctly""" fstr = _FakeString("fake string") - self.assertEquals(str(fstr), "fake string") - self.assertEquals(bytes(fstr), b"fake string") + self.assertEqual(str(fstr), "fake string") + self.assertEqual(bytes(fstr), b"fake string") if py3k: - self.assertEquals(repr(fstr), "'fake string'") + self.assertEqual(repr(fstr), "'fake string'") else: - self.assertEquals(repr(fstr), b"u'fake string'") + self.assertEqual(repr(fstr), b"u'fake string'") self.assertIsInstance(str(fstr), str) self.assertIsInstance(bytes(fstr), bytes) @@ -119,18 +119,18 @@ class TestStringMixIn(unittest.TestCase): self.assertTrue(str1) self.assertFalse(str2) - self.assertEquals(11, len(str1)) - self.assertEquals(0, len(str2)) + self.assertEqual(11, len(str1)) + self.assertEqual(0, len(str2)) out = [] for ch in str1: out.append(ch) - self.assertEquals(expected, out) + self.assertEqual(expected, out) out = [] for ch in str2: out.append(ch) - self.assertEquals([], out) + self.assertEqual([], out) gen1 = iter(str1) gen2 = iter(str2) @@ -141,16 +141,16 @@ class TestStringMixIn(unittest.TestCase): for i in range(len(str1)): out.append(gen1.next()) self.assertRaises(StopIteration, gen1.next) - self.assertEquals(expected, out) + self.assertEqual(expected, out) self.assertRaises(StopIteration, gen2.next) - self.assertEquals("gnirts ekaf", "".join(list(reversed(str1)))) - self.assertEquals([], list(reversed(str2))) + self.assertEqual("gnirts ekaf", "".join(list(reversed(str1)))) + self.assertEqual([], list(reversed(str2))) - self.assertEquals("f", str1[0]) - self.assertEquals(" ", str1[4]) - self.assertEquals("g", str1[10]) - self.assertEquals("n", str1[-2]) + self.assertEqual("f", str1[0]) + self.assertEqual(" ", str1[4]) + self.assertEqual("g", str1[10]) + self.assertEqual("n", str1[-2]) self.assertRaises(IndexError, lambda: str1[11]) self.assertRaises(IndexError, lambda: str2[0]) @@ -165,75 +165,75 @@ class TestStringMixIn(unittest.TestCase): def test_other_methods(self): """test the remaining non-magic methods of StringMixIn""" str1 = _FakeString("fake string") - self.assertEquals("Fake string", str1.capitalize()) + self.assertEqual("Fake string", str1.capitalize()) - self.assertEquals(" fake string ", str1.center(15)) - self.assertEquals(" fake string ", str1.center(16)) - self.assertEquals("qqfake stringqq", str1.center(15, "q")) + self.assertEqual(" fake string ", str1.center(15)) + self.assertEqual(" fake string ", str1.center(16)) + self.assertEqual("qqfake stringqq", str1.center(15, "q")) - self.assertEquals(1, str1.count("e")) - self.assertEquals(0, str1.count("z")) - self.assertEquals(1, str1.count("r", 7)) - self.assertEquals(0, str1.count("r", 8)) - self.assertEquals(1, str1.count("r", 5, 9)) - self.assertEquals(0, str1.count("r", 5, 7)) + self.assertEqual(1, str1.count("e")) + self.assertEqual(0, str1.count("z")) + self.assertEqual(1, str1.count("r", 7)) + self.assertEqual(0, str1.count("r", 8)) + self.assertEqual(1, str1.count("r", 5, 9)) + self.assertEqual(0, str1.count("r", 5, 7)) if not py3k: str2 = _FakeString("fo") - self.assertEquals(str1, str1.decode()) + self.assertEqual(str1, str1.decode()) actual = _FakeString("\\U00010332\\U0001033f\\U00010344") - self.assertEquals("𐌲𐌿𐍄", actual.decode("unicode_escape")) + self.assertEqual("𐌲𐌿𐍄", actual.decode("unicode_escape")) self.assertRaises(UnicodeError, str2.decode, "punycode") - self.assertEquals("", str2.decode("punycode", "ignore")) + self.assertEqual("", str2.decode("punycode", "ignore")) str3 = _FakeString("𐌲𐌿𐍄") - self.assertEquals(b"fake string", str1.encode()) - self.assertEquals(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", + self.assertEqual(b"fake string", str1.encode()) + self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", str3.encode("utf8")) - self.assertEquals(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", + self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", str3.encode(encoding="utf8")) self.assertRaises(UnicodeEncodeError, str3.encode) self.assertRaises(UnicodeEncodeError, str3.encode, "ascii") self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict") self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict") - self.assertEquals("", str3.encode("ascii", "ignore")) - self.assertEquals("", str3.encode(errors="ignore")) + self.assertEqual("", str3.encode("ascii", "ignore")) + self.assertEqual("", str3.encode(errors="ignore")) self.assertTrue(str1.endswith("ing")) self.assertFalse(str1.endswith("ingh")) str4 = _FakeString("\tfoobar") - self.assertEquals("fake string", str1) - self.assertEquals(" foobar", str4.expandtabs()) - self.assertEquals(" foobar", str4.expandtabs(4)) + self.assertEqual("fake string", str1) + self.assertEqual(" foobar", str4.expandtabs()) + self.assertEqual(" foobar", str4.expandtabs(4)) - self.assertEquals(3, str1.find("e")) - self.assertEquals(-1, str1.find("z")) - self.assertEquals(7, str1.find("r", 7)) - self.assertEquals(-1, str1.find("r", 8)) - self.assertEquals(7, str1.find("r", 5, 9)) - self.assertEquals(-1, str1.find("r", 5, 7)) + self.assertEqual(3, str1.find("e")) + self.assertEqual(-1, str1.find("z")) + self.assertEqual(7, str1.find("r", 7)) + self.assertEqual(-1, str1.find("r", 8)) + self.assertEqual(7, str1.find("r", 5, 9)) + self.assertEqual(-1, str1.find("r", 5, 7)) str5 = _FakeString("foo{0}baz") str6 = _FakeString("foo{abc}baz") str7 = _FakeString("foo{0}{abc}buzz") str8 = _FakeString("{0}{1}") - self.assertEquals("fake string", str1.format()) - self.assertEquals("foobarbaz", str5.format("bar")) - self.assertEquals("foobarbaz", str6.format(abc="bar")) - self.assertEquals("foobarbazbuzz", str7.format("bar", abc="baz")) + self.assertEqual("fake string", str1.format()) + self.assertEqual("foobarbaz", str5.format("bar")) + self.assertEqual("foobarbaz", str6.format(abc="bar")) + self.assertEqual("foobarbazbuzz", str7.format("bar", abc="baz")) self.assertRaises(IndexError, str8.format, "abc") if py3k: - self.assertEquals("fake string", str1.format_map({})) - self.assertEquals("foobarbaz", str6.format_map({"abc": "bar"})) + self.assertEqual("fake string", str1.format_map({})) + self.assertEqual("foobarbaz", str6.format_map({"abc": "bar"})) self.assertRaises(ValueError, str5.format_map, {0: "abc"}) - self.assertEquals(3, str1.index("e")) + self.assertEqual(3, str1.index("e")) self.assertRaises(ValueError, str1.index, "z") - self.assertEquals(7, str1.index("r", 7)) + self.assertEqual(7, str1.index("r", 7)) self.assertRaises(ValueError, str1.index, "r", 8) - self.assertEquals(7, str1.index("r", 5, 9)) + self.assertEqual(7, str1.index("r", 5, 9)) self.assertRaises(ValueError, str1.index, "r", 5, 7) str9 = _FakeString("foobar") @@ -303,120 +303,120 @@ class TestStringMixIn(unittest.TestCase): self.assertFalse(str15.isupper()) self.assertTrue(str21.isupper()) - self.assertEquals("foobar", str15.join(["foo", "bar"])) - self.assertEquals("foo123bar123baz", str12.join(("foo", "bar", "baz"))) + self.assertEqual("foobar", str15.join(["foo", "bar"])) + self.assertEqual("foo123bar123baz", str12.join(("foo", "bar", "baz"))) - self.assertEquals("fake string ", str1.ljust(15)) - self.assertEquals("fake string ", str1.ljust(16)) - self.assertEquals("fake stringqqqq", str1.ljust(15, "q")) + self.assertEqual("fake string ", str1.ljust(15)) + self.assertEqual("fake string ", str1.ljust(16)) + self.assertEqual("fake stringqqqq", str1.ljust(15, "q")) str22 = _FakeString("ß") - self.assertEquals("", str15.lower()) - self.assertEquals("foobar", str16.lower()) - self.assertEquals("ß", str22.lower()) + self.assertEqual("", str15.lower()) + self.assertEqual("foobar", str16.lower()) + self.assertEqual("ß", str22.lower()) if py3k: - self.assertEquals("", str15.casefold()) - self.assertEquals("foobar", str16.casefold()) - self.assertEquals("ss", str22.casefold()) + self.assertEqual("", str15.casefold()) + self.assertEqual("foobar", str16.casefold()) + self.assertEqual("ss", str22.casefold()) str23 = _FakeString(" fake string ") - self.assertEquals("fake string", str1.lstrip()) - self.assertEquals("fake string ", str23.lstrip()) - self.assertEquals("ke string", str1.lstrip("abcdef")) + self.assertEqual("fake string", str1.lstrip()) + self.assertEqual("fake string ", str23.lstrip()) + self.assertEqual("ke string", str1.lstrip("abcdef")) - self.assertEquals(("fa", "ke", " string"), str1.partition("ke")) - self.assertEquals(("fake string", "", ""), str1.partition("asdf")) + self.assertEqual(("fa", "ke", " string"), str1.partition("ke")) + self.assertEqual(("fake string", "", ""), str1.partition("asdf")) str24 = _FakeString("boo foo moo") - self.assertEquals("real string", str1.replace("fake", "real")) - self.assertEquals("bu fu moo", str24.replace("oo", "u", 2)) + self.assertEqual("real string", str1.replace("fake", "real")) + self.assertEqual("bu fu moo", str24.replace("oo", "u", 2)) - self.assertEquals(3, str1.rfind("e")) - self.assertEquals(-1, str1.rfind("z")) - self.assertEquals(7, str1.rfind("r", 7)) - self.assertEquals(-1, str1.rfind("r", 8)) - self.assertEquals(7, str1.rfind("r", 5, 9)) - self.assertEquals(-1, str1.rfind("r", 5, 7)) + self.assertEqual(3, str1.rfind("e")) + self.assertEqual(-1, str1.rfind("z")) + self.assertEqual(7, str1.rfind("r", 7)) + self.assertEqual(-1, str1.rfind("r", 8)) + self.assertEqual(7, str1.rfind("r", 5, 9)) + self.assertEqual(-1, str1.rfind("r", 5, 7)) - self.assertEquals(3, str1.rindex("e")) + self.assertEqual(3, str1.rindex("e")) self.assertRaises(ValueError, str1.rindex, "z") - self.assertEquals(7, str1.rindex("r", 7)) + self.assertEqual(7, str1.rindex("r", 7)) self.assertRaises(ValueError, str1.rindex, "r", 8) - self.assertEquals(7, str1.rindex("r", 5, 9)) + self.assertEqual(7, str1.rindex("r", 5, 9)) self.assertRaises(ValueError, str1.rindex, "r", 5, 7) - self.assertEquals(" fake string", str1.rjust(15)) - self.assertEquals(" fake string", str1.rjust(16)) - self.assertEquals("qqqqfake string", str1.rjust(15, "q")) + self.assertEqual(" fake string", str1.rjust(15)) + self.assertEqual(" fake string", str1.rjust(16)) + self.assertEqual("qqqqfake string", str1.rjust(15, "q")) - self.assertEquals(("fa", "ke", " string"), str1.rpartition("ke")) - self.assertEquals(("", "", "fake string"), str1.rpartition("asdf")) + self.assertEqual(("fa", "ke", " string"), str1.rpartition("ke")) + self.assertEqual(("", "", "fake string"), str1.rpartition("asdf")) str25 = _FakeString(" this is a sentence with whitespace ") actual = ["this", "is", "a", "sentence", "with", "whitespace"] - self.assertEquals(actual, str25.rsplit()) - self.assertEquals(actual, str25.rsplit(None)) + self.assertEqual(actual, str25.rsplit()) + self.assertEqual(actual, str25.rsplit(None)) actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", "", "whitespace", ""] - self.assertEquals(actual, str25.rsplit(" ")) + self.assertEqual(actual, str25.rsplit(" ")) actual = [" this is a", "sentence", "with", "whitespace"] - self.assertEquals(actual, str25.rsplit(None, 3)) + self.assertEqual(actual, str25.rsplit(None, 3)) actual = [" this is a sentence with", "", "whitespace", ""] - self.assertEquals(actual, str25.rsplit(" ", 3)) + self.assertEqual(actual, str25.rsplit(" ", 3)) if py3k: - self.assertEquals(actual, str25.rsplit(maxsplit=3)) + self.assertEqual(actual, str25.rsplit(maxsplit=3)) - self.assertEquals("fake string", str1.rstrip()) - self.assertEquals(" fake string", str23.rstrip()) - self.assertEquals("fake stri", str1.rstrip("ngr")) + self.assertEqual("fake string", str1.rstrip()) + self.assertEqual(" fake string", str23.rstrip()) + self.assertEqual("fake stri", str1.rstrip("ngr")) actual = ["this", "is", "a", "sentence", "with", "whitespace"] - self.assertEquals(actual, str25.split()) - self.assertEquals(actual, str25.split(None)) + self.assertEqual(actual, str25.split()) + self.assertEqual(actual, str25.split(None)) actual = ["", "", "", "this", "is", "a", "", "", "sentence", "with", "", "whitespace", ""] - self.assertEquals(actual, str25.split(" ")) + self.assertEqual(actual, str25.split(" ")) actual = ["this", "is", "a", "sentence with whitespace "] - self.assertEquals(actual, str25.split(None, 3)) + self.assertEqual(actual, str25.split(None, 3)) actual = ["", "", "", "this is a sentence with whitespace "] - self.assertEquals(actual, str25.split(" ", 3)) + self.assertEqual(actual, str25.split(" ", 3)) if py3k: - self.assertEquals(actual, str25.split(maxsplit=3)) + self.assertEqual(actual, str25.split(maxsplit=3)) str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") - self.assertEquals(["lines", "of", "text", "are", "presented", "here"], + self.assertEqual(["lines", "of", "text", "are", "presented", "here"], str26.splitlines()) - self.assertEquals(["lines\n", "of\n", "text\r\n", "are\r\n", + self.assertEqual(["lines\n", "of\n", "text\r\n", "are\r\n", "presented\n", "here"], str26.splitlines(True)) self.assertTrue(str1.startswith("fake")) self.assertFalse(str1.startswith("faker")) - self.assertEquals("fake string", str1.strip()) - self.assertEquals("fake string", str23.strip()) - self.assertEquals("ke stri", str1.strip("abcdefngr")) + self.assertEqual("fake string", str1.strip()) + self.assertEqual("fake string", str23.strip()) + self.assertEqual("ke stri", str1.strip("abcdefngr")) - self.assertEquals("fOObAR", str16.swapcase()) + self.assertEqual("fOObAR", str16.swapcase()) - self.assertEquals("Fake String", str1.title()) + self.assertEqual("Fake String", str1.title()) if py3k: table1 = str.maketrans({97: "1", 101: "2", 105: "3", 111: "4", 117: "5"}) table2 = str.maketrans("aeiou", "12345") table3 = str.maketrans("aeiou", "12345", "rts") - self.assertEquals("f1k2 str3ng", str1.translate(table1)) - self.assertEquals("f1k2 str3ng", str1.translate(table2)) - self.assertEquals("f1k2 3ng", str1.translate(table3)) + self.assertEqual("f1k2 str3ng", str1.translate(table1)) + self.assertEqual("f1k2 str3ng", str1.translate(table2)) + self.assertEqual("f1k2 3ng", str1.translate(table3)) else: table = {97: "1", 101: "2", 105: "3", 111: "4", 117: "5"} - self.assertEquals("f1k2 str3ng", str1.translate(table)) + self.assertEqual("f1k2 str3ng", str1.translate(table)) - self.assertEquals("", str15.upper()) - self.assertEquals("FOOBAR", str16.upper()) + self.assertEqual("", str15.upper()) + self.assertEqual("FOOBAR", str16.upper()) - self.assertEquals("123", str12.zfill(3)) - self.assertEquals("000123", str12.zfill(6)) + self.assertEqual("123", str12.zfill(3)) + self.assertEqual("000123", str12.zfill(6)) if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/test_tokens.py b/tests/test_tokens.py index 5a18b8e..1449ad2 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -42,8 +42,8 @@ class TestTokens(unittest.TestCase): token1 = tokens.Token() token2 = tokens.Token(foo="bar", baz=123) - self.assertEquals("bar", token2.foo) - self.assertEquals(123, token2.baz) + self.assertEqual("bar", token2.foo) + self.assertEqual(123, token2.baz) self.assertRaises(KeyError, lambda: token1.foo) self.assertRaises(KeyError, lambda: token2.bar) @@ -51,8 +51,8 @@ class TestTokens(unittest.TestCase): token2.foo = "ham" del token2.baz - self.assertEquals("eggs", token1.spam) - self.assertEquals("ham", token2.foo) + self.assertEqual("eggs", token1.spam) + self.assertEqual("ham", token2.foo) self.assertRaises(KeyError, lambda: token2.baz) self.assertRaises(KeyError, delattr, token2, "baz") @@ -63,15 +63,15 @@ class TestTokens(unittest.TestCase): token3 = tokens.Text(text="earwig" * 100) hundredchars = ("earwig" * 100)[:97] + "..." - self.assertEquals("Token()", repr(token1)) + self.assertEqual("Token()", repr(token1)) if py3k: token2repr = "Token(foo='bar', baz=123)" token3repr = "Text(text='" + hundredchars + "')" else: token2repr = "Token(foo=u'bar', baz=123)" token3repr = "Text(text=u'" + hundredchars + "')" - self.assertEquals(token2repr, repr(token2)) - self.assertEquals(token3repr, repr(token3)) + self.assertEqual(token2repr, repr(token2)) + self.assertEqual(token3repr, repr(token3)) def test_equality(self): """check that equivalent tokens are considered equal""" @@ -82,10 +82,10 @@ class TestTokens(unittest.TestCase): token5 = tokens.Text(text="asdf") token6 = tokens.TemplateOpen(text="asdf") - self.assertEquals(token1, token2) - self.assertEquals(token2, token1) - self.assertEquals(token4, token5) - self.assertEquals(token5, token4) + self.assertEqual(token1, token2) + self.assertEqual(token2, token1) + self.assertEqual(token4, token5) + self.assertEqual(token5, token4) self.assertNotEquals(token1, token3) self.assertNotEquals(token2, token3) self.assertNotEquals(token4, token6) @@ -99,7 +99,7 @@ class TestTokens(unittest.TestCase): tokens.Text(text="earwig") ] for token in tests: - self.assertEquals(token, eval(repr(token), vars(tokens))) + self.assertEqual(token, eval(repr(token), vars(tokens))) if __name__ == "__main__": unittest.main(verbosity=2) From 97a837c1e8d8fbaae71360f442f53ca7bd81a58f Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Mar 2013 01:36:02 -0400 Subject: [PATCH 054/115] Implement test_parser(). Clean up a few lambdas in TestSmartList. --- tests/test_parser.py | 62 +++++++++++++++++++++++++++++++++++++++++++++++- tests/test_smart_list.py | 8 +++---- 2 files changed, 65 insertions(+), 5 deletions(-) diff --git a/tests/test_parser.py b/tests/test_parser.py index 5ea2b49..6e775ce 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -23,8 +23,68 @@ from __future__ import unicode_literals import unittest +from mwparserfromhell.compat import range +from mwparserfromhell.nodes import Template, Text, Wikilink +from mwparserfromhell.nodes.extras import Parameter +from mwparserfromhell.parser import Parser +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.wikicode import Wikicode + class TestParser(unittest.TestCase): - pass + """Tests for the Parser class itself, which tokenizes and builds nodes.""" + + def assertNodesEqual(self, expected, actual): + """Assert that two Nodes are the same type and have the same data.""" + self.assertIs(type(expected), type(actual)) + if isinstance(expected, Text): + self.assertEqual(expected.value, actual.value) + elif isinstance(expected, Template): + self.assertWikicodeEqual(expected.name, actual.name) + length = len(expected.params) + self.assertEqual(length, len(actual.params)) + for i in range(length): + exp_param = expected.params[i] + act_param = actual.params[i] + self.assertWikicodeEqual(exp_param.name, act_param.name) + self.assertWikicodeEqual(exp_param.value, act_param.value) + self.assertIs(exp_param.showkey, act_param.showkey) + elif isinstance(expected, Wikilink): + self.assertWikicodeEqual(expected.title, actual.title) + if expected.text is not None: + self.assertWikicodeEqual(expected.text, actual.text) + else: + self.assertIs(None, actual.text) + + def assertWikicodeEqual(self, expected, actual): + """Assert that two Wikicode objects have the same data.""" + self.assertIsInstance(actual, Wikicode) + length = len(expected.nodes) + self.assertEqual(length, len(actual.nodes)) + for i in range(length): + self.assertNodesEqual(expected.get(i), actual.get(i)) + + def test_parser(self): + """integration test for parsing overall""" + text = "this is text; {{this|is=a|template={{with|[[links]]|in}}it}}" + wrap = lambda L: Wikicode(SmartList(L)) + expected = wrap([ + Text("this is text; "), + Template(wrap([Text("this")]), [ + Parameter(wrap([Text("is")]), wrap([Text("a")])), + Parameter(wrap([Text("template")]), wrap([ + Template(wrap([Text("with")]), [ + Parameter(wrap([Text("1")]), + wrap([Wikilink(wrap([Text("links")]))]), + showkey=False), + Parameter(wrap([Text("2")]), + wrap([Text("in")]), showkey=False) + ]), + Text("it") + ])) + ]) + ]) + actual = Parser(text).parse() + self.assertWikicodeEqual(expected, actual) if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 680de9d..d821ccd 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -288,19 +288,19 @@ class TestSmartList(unittest.TestCase): def test_parent_get_set_del(self): """make sure SmartList's getitem/setitem/delitem work""" - self._test_get_set_del_item(lambda L: SmartList(L)) + self._test_get_set_del_item(SmartList) def test_parent_add(self): """make sure SmartList's add/radd/iadd work""" - self._test_add_radd_iadd(lambda L: SmartList(L)) + self._test_add_radd_iadd(SmartList) def test_parent_unaffected_magics(self): """sanity checks against SmartList features that were not modified""" - self._test_other_magic_methods(lambda L: SmartList(L)) + self._test_other_magic_methods(SmartList) def test_parent_methods(self): """make sure SmartList's non-magic methods work, like append()""" - self._test_list_methods(lambda L: SmartList(L)) + self._test_list_methods(SmartList) def test_child_get_set_del(self): """make sure _ListProxy's getitem/setitem/delitem work""" From f8032695146f032108c1b736631f546712689372 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Mar 2013 17:19:08 -0400 Subject: [PATCH 055/115] Add a USES_C field to the tokenizers; add TestParser.test_use_c() --- mwparserfromhell/parser/tokenizer.c | 2 ++ mwparserfromhell/parser/tokenizer.py | 1 + tests/test_parser.py | 13 ++++++++++--- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 8c96500..d3abb22 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -1387,6 +1387,8 @@ init_tokenizer(void) module = Py_InitModule("_tokenizer", module_methods); Py_INCREF(&TokenizerType); PyModule_AddObject(module, "CTokenizer", (PyObject*) &TokenizerType); + Py_INCREF(Py_True); + PyDict_SetItemString(TokenizerType.tp_dict, "USES_C", Py_True); tempmod = PyImport_ImportModule("htmlentitydefs"); if (!tempmod) diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index 67638ca..0bf0322 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -38,6 +38,7 @@ class BadRoute(Exception): class Tokenizer(object): """Creates a list of tokens from a string of wikicode.""" + USES_C = False START = object() END = object() MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", diff --git a/tests/test_parser.py b/tests/test_parser.py index 6e775ce..4f718c8 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -23,10 +23,10 @@ from __future__ import unicode_literals import unittest +from mwparserfromhell import parser from mwparserfromhell.compat import range from mwparserfromhell.nodes import Template, Text, Wikilink from mwparserfromhell.nodes.extras import Parameter -from mwparserfromhell.parser import Parser from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode @@ -63,7 +63,14 @@ class TestParser(unittest.TestCase): for i in range(length): self.assertNodesEqual(expected.get(i), actual.get(i)) - def test_parser(self): + def test_use_c(self): + """make sure the correct tokenizer is used""" + if parser.use_c: + self.assertTrue(parser.Parser(None)._tokenizer.USES_C) + parser.use_c = False + self.assertFalse(parser.Parser(None)._tokenizer.USES_C) + + def test_parsing(self): """integration test for parsing overall""" text = "this is text; {{this|is=a|template={{with|[[links]]|in}}it}}" wrap = lambda L: Wikicode(SmartList(L)) @@ -83,7 +90,7 @@ class TestParser(unittest.TestCase): ])) ]) ]) - actual = Parser(text).parse() + actual = parser.Parser(text).parse() self.assertWikicodeEqual(expected, actual) if __name__ == "__main__": From 27a3503aa113c12971fab6a1d8fd676180b70449 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Mar 2013 17:22:37 -0400 Subject: [PATCH 056/115] Add test_uses_c() to TestPyTokenizer and TestCTokenizer --- tests/test_ctokenizer.py | 8 +++++++- tests/test_pytokenizer.py | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 4dbeceb..7ef8975 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -23,6 +23,8 @@ from __future__ import unicode_literals import unittest +from mwparserfromhell.parser._tokenizer import CTokenizer + from _test_tokenizer import TokenizerTestCase class TestCTokenizer(TokenizerTestCase, unittest.TestCase): @@ -30,8 +32,12 @@ class TestCTokenizer(TokenizerTestCase, unittest.TestCase): @classmethod def setUpClass(cls): - from mwparserfromhell.parser._tokenizer import CTokenizer cls.tokenizer = CTokenizer + def test_uses_c(self): + """make sure the C tokenizer identifies as using a C extension""" + self.assertTrue(CTokenizer.USES_C) + self.assertTrue(CTokenizer().USES_C) + if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index 73e6fe7..3e598bf 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -23,6 +23,8 @@ from __future__ import unicode_literals import unittest +from mwparserfromhell.parser.tokenizer import Tokenizer + from _test_tokenizer import TokenizerTestCase class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): @@ -30,8 +32,12 @@ class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): @classmethod def setUpClass(cls): - from mwparserfromhell.parser.tokenizer import Tokenizer cls.tokenizer = Tokenizer + def test_uses_c(self): + """make sure the Python tokenizer identifies as not using C""" + self.assertFalse(Tokenizer.USES_C) + self.assertFalse(Tokenizer().USES_C) + if __name__ == "__main__": unittest.main(verbosity=2) From 5ca6f6c755bb8b3d3a3190bab4cf6f0a1eb6b2a7 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Mar 2013 17:40:39 -0400 Subject: [PATCH 057/115] Skip test_readme_5() if web query fails. --- tests/test_docs.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_docs.py b/tests/test_docs.py index 075b0a7..971c5d1 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -113,7 +113,10 @@ class TestDocs(unittest.TestCase): title = "Test" data = {"action": "query", "prop": "revisions", "rvlimit": 1, "rvprop": "content", "format": "json", "titles": title} - raw = urllib.urlopen(url1, urllib.urlencode(data)).read() + try: + raw = urllib.urlopen(url1, urllib.urlencode(data)).read() + except IOError: + self.skipTest("cannot continue because of unsuccessful web call") res = json.loads(raw) text = res["query"]["pages"].values()[0]["revisions"][0]["*"] expected = urllib.urlopen(url2.format(title)).read().decode("utf8") From 7f87a1c4b371f813d5006b25cf39f2b40b4dc58e Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Mar 2013 19:39:12 -0400 Subject: [PATCH 058/115] Apply bugfixes so that some tests pass on Python 3. - Skip CTokenizer tests if CTokenizer is not available. - TestStringMixin: Don't make assumptions about default encoding. - Add urllib stuff to mwparserfromhell.compat. - Fix compat.py's line endings. - gen.next() -> next(gen) - assert*Equals() -> assert*Equal() --- mwparserfromhell/compat.py | 69 +++++++++++++++++++++------------------- mwparserfromhell/string_mixin.py | 2 +- tests/test_ctokenizer.py | 6 +++- tests/test_docs.py | 14 ++++---- tests/test_smart_list.py | 6 ++-- tests/test_string_mixin.py | 33 ++++++++++++------- tests/test_tokens.py | 17 ++++++---- 7 files changed, 85 insertions(+), 62 deletions(-) diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py index 48b9807..34870e6 100755 --- a/mwparserfromhell/compat.py +++ b/mwparserfromhell/compat.py @@ -1,33 +1,36 @@ -# -*- coding: utf-8 -*- - -""" -Implements support for both Python 2 and Python 3 by defining common types in -terms of their Python 2/3 variants. For example, :py:class:`str` is set to -:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise, -:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These -types are meant to be imported directly from within the parser's modules. -""" - -import sys - -py3k = sys.version_info[0] == 3 - -if py3k: - bytes = bytes - str = str - basestring = str - range = range - maxsize = sys.maxsize - import html.entities as htmlentities - from io import StringIO - -else: - bytes = str - str = unicode - basestring = basestring - range = xrange - maxsize = sys.maxint - import htmlentitydefs as htmlentities - from StringIO import StringIO - -del sys +# -*- coding: utf-8 -*- + +""" +Implements support for both Python 2 and Python 3 by defining common types in +terms of their Python 2/3 variants. For example, :py:class:`str` is set to +:py:class:`unicode` on Python 2 but :py:class:`str` on Python 3; likewise, +:py:class:`bytes` is :py:class:`str` on 2 but :py:class:`bytes` on 3. These +types are meant to be imported directly from within the parser's modules. +""" + +import sys + +py3k = sys.version_info[0] == 3 + +if py3k: + bytes = bytes + str = str + basestring = str + range = range + maxsize = sys.maxsize + import html.entities as htmlentities + from io import StringIO + from urllib.parse import urlencode + from urllib.request import urlopen + +else: + bytes = str + str = unicode + basestring = basestring + range = xrange + maxsize = sys.maxint + import htmlentitydefs as htmlentities + from StringIO import StringIO + from urllib import urlencode, urlopen + +del sys diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index eee58b9..6bee9c4 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -252,8 +252,8 @@ class StringMixIn(object): return self.__unicode__().lstrip(chars) if py3k: - @inheritdoc @staticmethod + @inheritdoc def maketrans(self, x, y=None, z=None): if z is None: if y is None: diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 7ef8975..f21378c 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -23,10 +23,14 @@ from __future__ import unicode_literals import unittest -from mwparserfromhell.parser._tokenizer import CTokenizer +try: + from mwparserfromhell.parser._tokenizer import CTokenizer +except ImportError: + CTokenizer = None from _test_tokenizer import TokenizerTestCase +@unittest.skipUnless(CTokenizer, "C tokenizer not available") class TestCTokenizer(TokenizerTestCase, unittest.TestCase): """Test cases for the C tokenizer.""" diff --git a/tests/test_docs.py b/tests/test_docs.py index 971c5d1..3b23bb7 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -23,10 +23,9 @@ from __future__ import print_function, unicode_literals import json import unittest -import urllib import mwparserfromhell -from mwparserfromhell.compat import py3k, str, StringIO +from mwparserfromhell.compat import py3k, str, StringIO, urlencode, urlopen class TestDocs(unittest.TestCase): """Integration test cases for mwparserfromhell's documentation.""" @@ -114,12 +113,15 @@ class TestDocs(unittest.TestCase): data = {"action": "query", "prop": "revisions", "rvlimit": 1, "rvprop": "content", "format": "json", "titles": title} try: - raw = urllib.urlopen(url1, urllib.urlencode(data)).read() + raw = urlopen(url1, urlencode(data).encode("utf8")).read() + except IOError: + self.skipTest("cannot continue because of unsuccessful web call") + res = json.loads(raw.decode("utf8")) + text = list(res["query"]["pages"].values())[0]["revisions"][0]["*"] + try: + expected = urlopen(url2.format(title)).read().decode("utf8") except IOError: self.skipTest("cannot continue because of unsuccessful web call") - res = json.loads(raw) - text = res["query"]["pages"].values()[0]["revisions"][0]["*"] - expected = urllib.urlopen(url2.format(title)).read().decode("utf8") actual = mwparserfromhell.parse(text) self.assertEqual(expected, actual) diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index d821ccd..01caca7 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -180,11 +180,11 @@ class TestSmartList(unittest.TestCase): gen1 = iter(list1) out = [] for i in range(len(list1)): - out.append(gen1.next()) - self.assertRaises(StopIteration, gen1.next) + out.append(next(gen1)) + self.assertRaises(StopIteration, next, gen1) self.assertEqual([0, 1, 2, 3, "one", "two"], out) gen2 = iter(list2) - self.assertRaises(StopIteration, gen2.next) + self.assertRaises(StopIteration, next, gen2) self.assertEqual(["two", "one", 3, 2, 1, 0], list(reversed(list1))) self.assertEqual([], list(reversed(list2))) diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 6ef6344..6d10609 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -21,6 +21,7 @@ # SOFTWARE. from __future__ import unicode_literals +from sys import getdefaultencoding from types import GeneratorType import unittest @@ -139,10 +140,10 @@ class TestStringMixIn(unittest.TestCase): out = [] for i in range(len(str1)): - out.append(gen1.next()) - self.assertRaises(StopIteration, gen1.next) + out.append(next(gen1)) + self.assertRaises(StopIteration, next, gen1) self.assertEqual(expected, out) - self.assertRaises(StopIteration, gen2.next) + self.assertRaises(StopIteration, next, gen2) self.assertEqual("gnirts ekaf", "".join(list(reversed(str1)))) self.assertEqual([], list(reversed(str2))) @@ -187,17 +188,25 @@ class TestStringMixIn(unittest.TestCase): self.assertEqual("", str2.decode("punycode", "ignore")) str3 = _FakeString("𐌲𐌿𐍄") + actual = b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84" self.assertEqual(b"fake string", str1.encode()) - self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", - str3.encode("utf8")) - self.assertEqual(b"\xF0\x90\x8C\xB2\xF0\x90\x8C\xBF\xF0\x90\x8D\x84", - str3.encode(encoding="utf8")) - self.assertRaises(UnicodeEncodeError, str3.encode) + self.assertEqual(actual, str3.encode("utf-8")) + self.assertEqual(actual, str3.encode(encoding="utf-8")) + if getdefaultencoding() == "ascii": + self.assertRaises(UnicodeEncodeError, str3.encode) + elif getdefaultencoding() == "utf-8": + self.assertEqual(actual, str3.encode()) self.assertRaises(UnicodeEncodeError, str3.encode, "ascii") self.assertRaises(UnicodeEncodeError, str3.encode, "ascii", "strict") - self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict") - self.assertEqual("", str3.encode("ascii", "ignore")) - self.assertEqual("", str3.encode(errors="ignore")) + if getdefaultencoding() == "ascii": + self.assertRaises(UnicodeEncodeError, str3.encode, errors="strict") + elif getdefaultencoding() == "utf-8": + self.assertEqual(actual, str3.encode(errors="strict")) + self.assertEqual(b"", str3.encode("ascii", "ignore")) + if getdefaultencoding() == "ascii": + self.assertEqual(b"", str3.encode(errors="ignore")) + elif getdefaultencoding() == "utf-8": + self.assertEqual(actual, str3.encode(errors="ignore")) self.assertTrue(str1.endswith("ing")) self.assertFalse(str1.endswith("ingh")) @@ -364,6 +373,7 @@ class TestStringMixIn(unittest.TestCase): actual = [" this is a sentence with", "", "whitespace", ""] self.assertEqual(actual, str25.rsplit(" ", 3)) if py3k: + actual = [" this is a", "sentence", "with", "whitespace"] self.assertEqual(actual, str25.rsplit(maxsplit=3)) self.assertEqual("fake string", str1.rstrip()) @@ -381,6 +391,7 @@ class TestStringMixIn(unittest.TestCase): actual = ["", "", "", "this is a sentence with whitespace "] self.assertEqual(actual, str25.split(" ", 3)) if py3k: + actual = ["this", "is", "a", "sentence with whitespace "] self.assertEqual(actual, str25.split(maxsplit=3)) str26 = _FakeString("lines\nof\ntext\r\nare\r\npresented\nhere") diff --git a/tests/test_tokens.py b/tests/test_tokens.py index 1449ad2..4620982 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -65,12 +65,15 @@ class TestTokens(unittest.TestCase): self.assertEqual("Token()", repr(token1)) if py3k: - token2repr = "Token(foo='bar', baz=123)" + token2repr1 = "Token(foo='bar', baz=123)" + token2repr2 = "Token(baz=123, foo='bar')" token3repr = "Text(text='" + hundredchars + "')" else: - token2repr = "Token(foo=u'bar', baz=123)" + token2repr1 = "Token(foo=u'bar', baz=123)" + token2repr2 = "Token(baz=123, foo=u'bar')" token3repr = "Text(text=u'" + hundredchars + "')" - self.assertEqual(token2repr, repr(token2)) + token2repr = repr(token2) + self.assertTrue(token2repr == token2repr1 or token2repr == token2repr2) self.assertEqual(token3repr, repr(token3)) def test_equality(self): @@ -86,10 +89,10 @@ class TestTokens(unittest.TestCase): self.assertEqual(token2, token1) self.assertEqual(token4, token5) self.assertEqual(token5, token4) - self.assertNotEquals(token1, token3) - self.assertNotEquals(token2, token3) - self.assertNotEquals(token4, token6) - self.assertNotEquals(token5, token6) + self.assertNotEqual(token1, token3) + self.assertNotEqual(token2, token3) + self.assertNotEqual(token4, token6) + self.assertNotEqual(token5, token6) def test_repr_equality(self): "check that eval(repr(token)) == token" From 32ac6958e1618e9025486212dac412346126bccd Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Mar 2013 20:59:23 -0400 Subject: [PATCH 059/115] Apply some bugfixes to SmartList to fix tests on Python 3. - Add a _SliceNormalizerMixIn to properly handle slices. - Use floor division when applying key.step. - Implement sort() without 'cmp' parameter. - Fix bytes(list) behavior. - Children of _ListProxies are now _ListProxies, not regular lists. --- mwparserfromhell/smart_list.py | 137 +++++++++++++++++++++++++++-------------- tests/test_smart_list.py | 12 ++-- 2 files changed, 99 insertions(+), 50 deletions(-) diff --git a/mwparserfromhell/smart_list.py b/mwparserfromhell/smart_list.py index 46c475a..09b7bbb 100644 --- a/mwparserfromhell/smart_list.py +++ b/mwparserfromhell/smart_list.py @@ -41,8 +41,23 @@ def inheritdoc(method): method.__doc__ = getattr(list, method.__name__).__doc__ return method +class _SliceNormalizerMixIn(object): + """MixIn that provides a private method to normalize slices.""" -class SmartList(list): + def _normalize_slice(self, key): + """Return a slice equivalent to the input *key*, standardized.""" + if key.start is not None: + start = (len(self) + key.start) if key.start < 0 else key.start + else: + start = 0 + if key.stop is not None: + stop = (len(self) + key.stop) if key.stop < 0 else key.stop + else: + stop = maxsize + return slice(start, stop, key.step or 1) + + +class SmartList(_SliceNormalizerMixIn, list): """Implements the ``list`` interface with special handling of sublists. When a sublist is created (by ``list[i:j]``), any changes made to this @@ -76,8 +91,8 @@ class SmartList(list): def __getitem__(self, key): if not isinstance(key, slice): return super(SmartList, self).__getitem__(key) - keystop = maxsize if key.stop is None else key.stop - sliceinfo = [key.start or 0, keystop, key.step or 1] + key = self._normalize_slice(key) + sliceinfo = [key.start, key.stop, key.step] child = _ListProxy(self, sliceinfo) self._children[id(child)] = (child, sliceinfo) return child @@ -87,9 +102,8 @@ class SmartList(list): return super(SmartList, self).__setitem__(key, item) item = list(item) super(SmartList, self).__setitem__(key, item) - keystop = maxsize if key.stop is None else key.stop - key = slice(key.start or 0, keystop, key.step or 1) - diff = len(item) + (key.start - key.stop) / key.step + key = self._normalize_slice(key) + diff = len(item) + (key.start - key.stop) // key.step values = self._children.values if py3k else self._children.itervalues if diff: for child, (start, stop, step) in values(): @@ -101,11 +115,10 @@ class SmartList(list): def __delitem__(self, key): super(SmartList, self).__delitem__(key) if isinstance(key, slice): - keystop = maxsize if key.stop is None else key.stop - key = slice(key.start or 0, keystop, key.step or 1) + key = self._normalize_slice(key) else: key = slice(key, key + 1, 1) - diff = (key.stop - key.start) / key.step + diff = (key.stop - key.start) // key.step values = self._children.values if py3k else self._children.itervalues for child, (start, stop, step) in values(): if start > key.start: @@ -166,22 +179,35 @@ class SmartList(list): child._parent = copy super(SmartList, self).reverse() - @inheritdoc - def sort(self, cmp=None, key=None, reverse=None): - copy = list(self) - for child in self._children: - child._parent = copy - kwargs = {} - if cmp is not None: - kwargs["cmp"] = cmp - if key is not None: - kwargs["key"] = key - if reverse is not None: - kwargs["reverse"] = reverse - super(SmartList, self).sort(**kwargs) - - -class _ListProxy(list): + if py3k: + @inheritdoc + def sort(self, key=None, reverse=None): + copy = list(self) + for child in self._children: + child._parent = copy + kwargs = {} + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + super(SmartList, self).sort(**kwargs) + else: + @inheritdoc + def sort(self, cmp=None, key=None, reverse=None): + copy = list(self) + for child in self._children: + child._parent = copy + kwargs = {} + if cmp is not None: + kwargs["cmp"] = cmp + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + super(SmartList, self).sort(**kwargs) + + +class _ListProxy(_SliceNormalizerMixIn, list): """Implement the ``list`` interface by getting elements from a parent. This is created by a :py:class:`~.SmartList` object when slicing. It does @@ -235,19 +261,28 @@ class _ListProxy(list): return bool(self._render()) def __len__(self): - return (self._stop - self._start) / self._step + return (self._stop - self._start) // self._step def __getitem__(self, key): - return self._render()[key] + if isinstance(key, slice): + key = self._normalize_slice(key) + if key.stop == maxsize: + keystop = self._stop + else: + keystop = key.stop + self._start + adjusted = slice(key.start + self._start, keystop, key.step) + return self._parent[adjusted] + else: + return self._render()[key] def __setitem__(self, key, item): if isinstance(key, slice): - keystart = (key.start or 0) + self._start - if key.stop is None or key.stop == maxsize: + key = self._normalize_slice(key) + if key.stop == maxsize: keystop = self._stop else: keystop = key.stop + self._start - adjusted = slice(keystart, keystop, key.step) + adjusted = slice(key.start + self._start, keystop, key.step) self._parent[adjusted] = item else: length = len(self) @@ -259,12 +294,12 @@ class _ListProxy(list): def __delitem__(self, key): if isinstance(key, slice): - keystart = (key.start or 0) + self._start - if key.stop is None or key.stop == maxsize: + key = self._normalize_slice(key) + if key.stop == maxsize: keystop = self._stop else: keystop = key.stop + self._start - adjusted = slice(keystart, keystop, key.step) + adjusted = slice(key.start + self._start, keystop, key.step) del self._parent[adjusted] else: length = len(self) @@ -388,18 +423,30 @@ class _ListProxy(list): item.reverse() self._parent[self._start:self._stop:self._step] = item - @inheritdoc - def sort(self, cmp=None, key=None, reverse=None): - item = self._render() - kwargs = {} - if cmp is not None: - kwargs["cmp"] = cmp - if key is not None: - kwargs["key"] = key - if reverse is not None: - kwargs["reverse"] = reverse - item.sort(**kwargs) - self._parent[self._start:self._stop:self._step] = item + if py3k: + @inheritdoc + def sort(self, key=None, reverse=None): + item = self._render() + kwargs = {} + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + item.sort(**kwargs) + self._parent[self._start:self._stop:self._step] = item + else: + @inheritdoc + def sort(self, cmp=None, key=None, reverse=None): + item = self._render() + kwargs = {} + if cmp is not None: + kwargs["cmp"] = cmp + if key is not None: + kwargs["key"] = key + if reverse is not None: + kwargs["reverse"] = reverse + item.sort(**kwargs) + self._parent[self._start:self._stop:self._step] = item del inheritdoc diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 01caca7..3423bb7 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -123,7 +123,7 @@ class TestSmartList(unittest.TestCase): if py3k: self.assertEqual("[0, 1, 2, 3, 'one', 'two']", str(list1)) - self.assertEqual(b"[0, 1, 2, 3, 'one', 'two']", bytes(list1)) + self.assertEqual(b"\x00\x01\x02", bytes(list4)) self.assertEqual("[0, 1, 2, 3, 'one', 'two']", repr(list1)) else: self.assertEqual("[0, 1, 2, 3, u'one', u'two']", unicode(list1)) @@ -256,10 +256,12 @@ class TestSmartList(unittest.TestCase): self.assertEqual([0, 2, 2, 3, 4, 5], list1) list1.sort(reverse=True) self.assertEqual([5, 4, 3, 2, 2, 0], list1) - list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y)) # Distance from 3 - self.assertEqual([3, 4, 2, 2, 5, 0], list1) - list1.sort(cmp=lambda x, y: abs(3 - x) - abs(3 - y), reverse=True) - self.assertEqual([0, 5, 4, 2, 2, 3], list1) + if not py3k: + func = lambda x, y: abs(3 - x) - abs(3 - y) # Distance from 3 + list1.sort(cmp=func) + self.assertEqual([3, 4, 2, 2, 5, 0], list1) + list1.sort(cmp=func, reverse=True) + self.assertEqual([0, 5, 4, 2, 2, 3], list1) list3.sort(key=lambda i: i[1]) self.assertEqual([("d", 2), ("c", 3), ("a", 5), ("b", 8)], list3) list3.sort(key=lambda i: i[1], reverse=True) From eae6f11add071401c95e89c5f8ea42be2d0c96aa Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 27 Mar 2013 21:24:45 -0400 Subject: [PATCH 060/115] Make _test_tokenizer import relative so tests work on py3k. --- tests/test_ctokenizer.py | 2 +- tests/test_pytokenizer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index f21378c..7a082e8 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -28,7 +28,7 @@ try: except ImportError: CTokenizer = None -from _test_tokenizer import TokenizerTestCase +from ._test_tokenizer import TokenizerTestCase @unittest.skipUnless(CTokenizer, "C tokenizer not available") class TestCTokenizer(TokenizerTestCase, unittest.TestCase): diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index 3e598bf..697c7e5 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -25,7 +25,7 @@ import unittest from mwparserfromhell.parser.tokenizer import Tokenizer -from _test_tokenizer import TokenizerTestCase +from ._test_tokenizer import TokenizerTestCase class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): """Test cases for the Python tokenizer.""" From 1b69b5e882944abf0909816d2daed76c37cbe9c8 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 30 Mar 2013 16:46:39 -0400 Subject: [PATCH 061/115] Moving compat stuff exclusively for unit tests to its own file. --- mwparserfromhell/compat.py | 7 ------- tests/compat.py | 20 ++++++++++++++++++++ tests/test_docs.py | 4 +++- tests/test_parser.py | 3 ++- tests/test_smart_list.py | 4 +++- tests/test_string_mixin.py | 4 +++- 6 files changed, 31 insertions(+), 11 deletions(-) create mode 100644 tests/compat.py diff --git a/mwparserfromhell/compat.py b/mwparserfromhell/compat.py index 34870e6..bb81513 100755 --- a/mwparserfromhell/compat.py +++ b/mwparserfromhell/compat.py @@ -16,21 +16,14 @@ if py3k: bytes = bytes str = str basestring = str - range = range maxsize = sys.maxsize import html.entities as htmlentities - from io import StringIO - from urllib.parse import urlencode - from urllib.request import urlopen else: bytes = str str = unicode basestring = basestring - range = xrange maxsize = sys.maxint import htmlentitydefs as htmlentities - from StringIO import StringIO - from urllib import urlencode, urlopen del sys diff --git a/tests/compat.py b/tests/compat.py new file mode 100644 index 0000000..8bed40e --- /dev/null +++ b/tests/compat.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- + +""" +Serves the same purpose as mwparserfromhell.compat, but only for objects +required by unit tests. This avoids unnecessary imports (like urllib) within +the main library. +""" + +from mwparserfromhell.compat import py3k + +if py3k: + range = range + from io import StringIO + from urllib.parse import urlencode + from urllib.request import urlopen + +else: + range = xrange + from StringIO import StringIO + from urllib import urlencode, urlopen diff --git a/tests/test_docs.py b/tests/test_docs.py index 3b23bb7..8d95c47 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -25,7 +25,9 @@ import json import unittest import mwparserfromhell -from mwparserfromhell.compat import py3k, str, StringIO, urlencode, urlopen +from mwparserfromhell.compat import py3k, str + +from .compat import StringIO, urlencode, urlopen class TestDocs(unittest.TestCase): """Integration test cases for mwparserfromhell's documentation.""" diff --git a/tests/test_parser.py b/tests/test_parser.py index 4f718c8..1c37a85 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -24,12 +24,13 @@ from __future__ import unicode_literals import unittest from mwparserfromhell import parser -from mwparserfromhell.compat import range from mwparserfromhell.nodes import Template, Text, Wikilink from mwparserfromhell.nodes.extras import Parameter from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode +from .compat import range + class TestParser(unittest.TestCase): """Tests for the Parser class itself, which tokenizes and builds nodes.""" diff --git a/tests/test_smart_list.py b/tests/test_smart_list.py index 3423bb7..25df555 100644 --- a/tests/test_smart_list.py +++ b/tests/test_smart_list.py @@ -23,9 +23,11 @@ from __future__ import unicode_literals import unittest -from mwparserfromhell.compat import py3k, range +from mwparserfromhell.compat import py3k from mwparserfromhell.smart_list import SmartList, _ListProxy +from .compat import range + class TestSmartList(unittest.TestCase): """Test cases for the SmartList class and its child, _ListProxy.""" diff --git a/tests/test_string_mixin.py b/tests/test_string_mixin.py index 6d10609..306f2fd 100644 --- a/tests/test_string_mixin.py +++ b/tests/test_string_mixin.py @@ -25,9 +25,11 @@ from sys import getdefaultencoding from types import GeneratorType import unittest -from mwparserfromhell.compat import bytes, py3k, range, str +from mwparserfromhell.compat import bytes, py3k, str from mwparserfromhell.string_mixin import StringMixIn +from .compat import range + class _FakeString(StringMixIn): def __init__(self, data): self._data = data From e3f89af62dcc323b6119174a07868057e814ede9 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 30 Mar 2013 18:38:29 -0400 Subject: [PATCH 062/115] Adding a TreeEqualityTestCase base class. --- tests/_test_tokenizer.py | 3 +- tests/_test_tree_equality.py | 78 ++++++++++++++++++++++++++++++++++++++++++++ tests/test_ctokenizer.py | 2 +- tests/test_parser.py | 33 ++----------------- tests/test_pytokenizer.py | 2 +- 5 files changed, 84 insertions(+), 34 deletions(-) create mode 100644 tests/_test_tree_equality.py diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 379b4fa..13882aa 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -21,6 +21,7 @@ # SOFTWARE. from __future__ import print_function, unicode_literals +from unittest import TestCase from os import listdir, path from mwparserfromhell.compat import py3k @@ -31,7 +32,7 @@ class _TestParseError(Exception): pass -class TokenizerTestCase(object): +class TokenizerTestCase(TestCase): """A base test case for tokenizers, whose tests are loaded dynamically. Subclassed along with unittest.TestCase to form TestPyTokenizer and diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py new file mode 100644 index 0000000..26c373d --- /dev/null +++ b/tests/_test_tree_equality.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +from unittest import TestCase + +from mwparserfromhell.nodes import Template, Text, Wikilink +from mwparserfromhell.nodes.extras import Parameter +from mwparserfromhell.wikicode import Wikicode + +class TreeEqualityTestCase(TestCase): + """A base test case with support for comparing the equality of node trees. + + This adds a number of type equality functions, for Wikicode, Text, + Templates, and Wikilinks. + """ + + def assertNodeEqual(self, expected, actual): + registry = { + Text: self.assertTextNodeEqual, + Template: self.assertTemplateNodeEqual, + Wikilink: self.assertWikilinkNodeEqual + } + for nodetype in registry: + if isinstance(expected, nodetype): + self.assertIsInstance(actual, nodetype) + registry[nodetype](expected, actual) + + def assertTextNodeEqual(self, expected, actual): + """Assert that two Text nodes have the same data.""" + self.assertEqual(expected.value, actual.value) + + def assertTemplateNodeEqual(self, expected, actual): + """Assert that two Template nodes have the same data.""" + self.assertWikicodeEqual(expected.name, actual.name) + length = len(expected.params) + self.assertEqual(length, len(actual.params)) + for i in range(length): + exp_param = expected.params[i] + act_param = actual.params[i] + self.assertWikicodeEqual(exp_param.name, act_param.name) + self.assertWikicodeEqual(exp_param.value, act_param.value) + self.assertIs(exp_param.showkey, act_param.showkey) + + def assertWikilinkNodeEqual(self, expected, actual): + """Assert that two Wikilink nodes have the same data.""" + self.assertWikicodeEqual(expected.title, actual.title) + if expected.text is not None: + self.assertWikicodeEqual(expected.text, actual.text) + else: + self.assertIs(None, actual.text) + + def assertWikicodeEqual(self, expected, actual): + """Assert that two Wikicode objects have the same data.""" + self.assertIsInstance(actual, Wikicode) + length = len(expected.nodes) + self.assertEqual(length, len(actual.nodes)) + for i in range(length): + self.assertNodeEqual(expected.get(i), actual.get(i)) diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 7a082e8..955b9a0 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -31,7 +31,7 @@ except ImportError: from ._test_tokenizer import TokenizerTestCase @unittest.skipUnless(CTokenizer, "C tokenizer not available") -class TestCTokenizer(TokenizerTestCase, unittest.TestCase): +class TestCTokenizer(TokenizerTestCase): """Test cases for the C tokenizer.""" @classmethod diff --git a/tests/test_parser.py b/tests/test_parser.py index 1c37a85..9d2c969 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -29,41 +29,12 @@ from mwparserfromhell.nodes.extras import Parameter from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode +from ._test_tree_equality import TreeEqualityTestCase from .compat import range -class TestParser(unittest.TestCase): +class TestParser(TreeEqualityTestCase): """Tests for the Parser class itself, which tokenizes and builds nodes.""" - def assertNodesEqual(self, expected, actual): - """Assert that two Nodes are the same type and have the same data.""" - self.assertIs(type(expected), type(actual)) - if isinstance(expected, Text): - self.assertEqual(expected.value, actual.value) - elif isinstance(expected, Template): - self.assertWikicodeEqual(expected.name, actual.name) - length = len(expected.params) - self.assertEqual(length, len(actual.params)) - for i in range(length): - exp_param = expected.params[i] - act_param = actual.params[i] - self.assertWikicodeEqual(exp_param.name, act_param.name) - self.assertWikicodeEqual(exp_param.value, act_param.value) - self.assertIs(exp_param.showkey, act_param.showkey) - elif isinstance(expected, Wikilink): - self.assertWikicodeEqual(expected.title, actual.title) - if expected.text is not None: - self.assertWikicodeEqual(expected.text, actual.text) - else: - self.assertIs(None, actual.text) - - def assertWikicodeEqual(self, expected, actual): - """Assert that two Wikicode objects have the same data.""" - self.assertIsInstance(actual, Wikicode) - length = len(expected.nodes) - self.assertEqual(length, len(actual.nodes)) - for i in range(length): - self.assertNodesEqual(expected.get(i), actual.get(i)) - def test_use_c(self): """make sure the correct tokenizer is used""" if parser.use_c: diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index 697c7e5..7b37eb3 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -27,7 +27,7 @@ from mwparserfromhell.parser.tokenizer import Tokenizer from ._test_tokenizer import TokenizerTestCase -class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): +class TestPyTokenizer(TokenizerTestCase): """Test cases for the Python tokenizer.""" @classmethod From a8cb275b941b70524e8b97341784097434ae627c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 1 Apr 2013 19:04:55 -0400 Subject: [PATCH 063/115] Add TestUtils; implement two tests for it. Also, add a missing docstring in TreeEqualityTestCase. --- tests/_test_tree_equality.py | 1 + tests/test_utils.py | 67 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 tests/test_utils.py diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index 26c373d..0fdb531 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -35,6 +35,7 @@ class TreeEqualityTestCase(TestCase): """ def assertNodeEqual(self, expected, actual): + """Assert that two Nodes have the same type and have the same data.""" registry = { Text: self.assertTextNodeEqual, Template: self.assertTemplateNodeEqual, diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..8afad7a --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.nodes import Template, Text +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.utils import parse_anything +from mwparserfromhell.wikicode import Wikicode + +from ._test_tree_equality import TreeEqualityTestCase + +class TestUtils(TreeEqualityTestCase): + """Tests for the utils module, which provides parse_anything().""" + + def test_parse_anything_valid(self): + """tests for valid input to utils.parse_anything()""" + wrap = lambda L: Wikicode(SmartList(L)) + textify = lambda L: wrap([Text(item) for item in L]) + tests = [ + (wrap([Text("foobar")]), textify(["foobar"])), + (Template(wrap([Text("spam")])), + wrap([Template(textify(["spam"]))])), + ("fóóbar", textify(["fóóbar"])), + (b"foobár", textify(["foobár"])), + (123, textify(["123"])), + (True, textify(["True"])), + (None, wrap([])), + ([Text("foo"), Text("bar"), Text("baz")], + textify(["foo", "bar", "baz"])), + ([wrap([Text("foo")]), Text("bar"), "baz", 123, 456], + textify(["foo", "bar", "baz", "123", "456"])), + ([[[([[((("foo",),),)], "bar"],)]]], textify(["foo", "bar"])) + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, parse_anything(test)) + + def test_parse_anything_invalid(self): + """tests for invalid input to utils.parse_anything()""" + self.assertRaises(ValueError, parse_anything, Ellipsis) + self.assertRaises(ValueError, parse_anything, object) + self.assertRaises(ValueError, parse_anything, object()) + self.assertRaises(ValueError, parse_anything, type) + self.assertRaises(ValueError, parse_anything, ["foo", [object]]) + +if __name__ == "__main__": + unittest.main(verbosity=2) From 30d4f137a829a7bfd613363f3579f97337462024 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 1 Apr 2013 19:06:59 -0400 Subject: [PATCH 064/115] Curse you, Python 3! --- tests/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 8afad7a..c088530 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -42,7 +42,7 @@ class TestUtils(TreeEqualityTestCase): (Template(wrap([Text("spam")])), wrap([Template(textify(["spam"]))])), ("fóóbar", textify(["fóóbar"])), - (b"foobár", textify(["foobár"])), + (b"foob\xc3\xa1r", textify(["foobár"])), (123, textify(["123"])), (True, textify(["True"])), (None, wrap([])), From cda1ce95f3b46c3392e57de182bc925c815b7d1f Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 1 Apr 2013 19:11:30 -0400 Subject: [PATCH 065/115] Roll back part of e3f89af62d because CURSE YOU UNIT TESTING FRAMEWORK --- tests/_test_tokenizer.py | 3 +-- tests/test_ctokenizer.py | 2 +- tests/test_pytokenizer.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/_test_tokenizer.py b/tests/_test_tokenizer.py index 13882aa..379b4fa 100644 --- a/tests/_test_tokenizer.py +++ b/tests/_test_tokenizer.py @@ -21,7 +21,6 @@ # SOFTWARE. from __future__ import print_function, unicode_literals -from unittest import TestCase from os import listdir, path from mwparserfromhell.compat import py3k @@ -32,7 +31,7 @@ class _TestParseError(Exception): pass -class TokenizerTestCase(TestCase): +class TokenizerTestCase(object): """A base test case for tokenizers, whose tests are loaded dynamically. Subclassed along with unittest.TestCase to form TestPyTokenizer and diff --git a/tests/test_ctokenizer.py b/tests/test_ctokenizer.py index 955b9a0..7a082e8 100644 --- a/tests/test_ctokenizer.py +++ b/tests/test_ctokenizer.py @@ -31,7 +31,7 @@ except ImportError: from ._test_tokenizer import TokenizerTestCase @unittest.skipUnless(CTokenizer, "C tokenizer not available") -class TestCTokenizer(TokenizerTestCase): +class TestCTokenizer(TokenizerTestCase, unittest.TestCase): """Test cases for the C tokenizer.""" @classmethod diff --git a/tests/test_pytokenizer.py b/tests/test_pytokenizer.py index 7b37eb3..697c7e5 100644 --- a/tests/test_pytokenizer.py +++ b/tests/test_pytokenizer.py @@ -27,7 +27,7 @@ from mwparserfromhell.parser.tokenizer import Tokenizer from ._test_tokenizer import TokenizerTestCase -class TestPyTokenizer(TokenizerTestCase): +class TestPyTokenizer(TokenizerTestCase, unittest.TestCase): """Test cases for the Python tokenizer.""" @classmethod From 892092434fa748ef06ff2558c5b9dbfce9155071 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 1 Apr 2013 21:04:53 -0400 Subject: [PATCH 066/115] Skeleton for TestBuilder; adding some nodes to TreeEqualityTestCase. --- tests/_test_tree_equality.py | 38 ++++++++++++++++++++++++++++++------ tests/test_builder.py | 46 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 76 insertions(+), 8 deletions(-) diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index 0fdb531..16f4b49 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -23,8 +23,9 @@ from __future__ import unicode_literals from unittest import TestCase -from mwparserfromhell.nodes import Template, Text, Wikilink -from mwparserfromhell.nodes.extras import Parameter +from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, + Tag, Template, Text, Wikilink) +from mwparserfromhell.nodes.extras import Attribute, Parameter from mwparserfromhell.wikicode import Wikicode class TreeEqualityTestCase(TestCase): @@ -37,8 +38,13 @@ class TreeEqualityTestCase(TestCase): def assertNodeEqual(self, expected, actual): """Assert that two Nodes have the same type and have the same data.""" registry = { - Text: self.assertTextNodeEqual, + Argument: self.assertArgumentNodeEqual, + Comment: self.assertCommentNodeEqual, + Heading: self.assertHeadingNodeEqual, + HTMLEntity: self.assertHTMLEntityNodeEqual, + Tag: self.assertTagNodeEqual, Template: self.assertTemplateNodeEqual, + Text: self.assertTextNodeEqual, Wikilink: self.assertWikilinkNodeEqual } for nodetype in registry: @@ -46,9 +52,25 @@ class TreeEqualityTestCase(TestCase): self.assertIsInstance(actual, nodetype) registry[nodetype](expected, actual) - def assertTextNodeEqual(self, expected, actual): - """Assert that two Text nodes have the same data.""" - self.assertEqual(expected.value, actual.value) + def assertArgumentNodeEqual(self, expected, actual): + """Assert that two Argument nodes have the same data.""" + pass + + def assertCommentNodeEqual(self, expected, actual): + """Assert that two Comment nodes have the same data.""" + pass + + def assertHeadingNodeEqual(self, expected, actual): + """Assert that two Heading nodes have the same data.""" + pass + + def assertHTMLEntityNodeEqual(self, expected, actual): + """Assert that two HTMLEntity nodes have the same data.""" + pass + + def assertTagNodeEqual(self, expected, actual): + """Assert that two Tag nodes have the same data.""" + pass def assertTemplateNodeEqual(self, expected, actual): """Assert that two Template nodes have the same data.""" @@ -62,6 +84,10 @@ class TreeEqualityTestCase(TestCase): self.assertWikicodeEqual(exp_param.value, act_param.value) self.assertIs(exp_param.showkey, act_param.showkey) + def assertTextNodeEqual(self, expected, actual): + """Assert that two Text nodes have the same data.""" + self.assertEqual(expected.value, actual.value) + def assertWikilinkNodeEqual(self, expected, actual): """Assert that two Wikilink nodes have the same data.""" self.assertWikicodeEqual(expected.title, actual.title) diff --git a/tests/test_builder.py b/tests/test_builder.py index a3518fd..a80d8bf 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -23,8 +23,50 @@ from __future__ import unicode_literals import unittest -class TestBuilder(unittest.TestCase): - pass +from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, + Tag, Template, Text, Wikilink) +from mwparserfromhell.nodes.extras import Attribute, Parameter +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.wikicode import Wikicode + +from ._test_tree_equality import TreeEqualityTestCase + +wrap = lambda L: Wikicode(SmartList(L)) + +class TestBuilder(TreeEqualityTestCase): + """Tests for the builder, which turns tokens into Wikicode objects.""" + + def test_text(self): + """tests for building Text nodes""" + pass + + def test_template(self): + """tests for building Template nodes""" + pass + + def test_argument(self): + """tests for building Argument nodes""" + pass + + def test_wikilink(self): + """tests for building Wikilink nodes""" + pass + + def test_html_entity(self): + """tests for building HTMLEntity nodes""" + pass + + def test_heading(self): + """tests for building Heading nodes""" + pass + + def test_comment(self): + """tests for building Comment nodes""" + pass + + def test_tag(self): + """tests for building Tag nodes""" + pass if __name__ == "__main__": unittest.main(verbosity=2) From 404b4479a26ab89f41b2e9bae5c6ffc8d5777f67 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 1 Apr 2013 21:30:19 -0400 Subject: [PATCH 067/115] Implement the remaining asserts in TreeEqualityTestCase. --- mwparserfromhell/nodes/html_entity.py | 5 ++++- tests/_test_tree_equality.py | 18 +++++++++++++----- tests/test_builder.py | 1 + 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py index 221040b..5b7607c 100644 --- a/mwparserfromhell/nodes/html_entity.py +++ b/mwparserfromhell/nodes/html_entity.py @@ -135,7 +135,10 @@ class HTMLEntity(Node): @hex_char.setter def hex_char(self, newval): - self._hex_char = bool(newval) + newval = str(newval) + if newval not in ("x", "X"): + raise ValueError(newval) + self._hex_char = newval def normalize(self): """Return the unicode character represented by the HTML entity.""" diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index 16f4b49..2014ac1 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -54,23 +54,31 @@ class TreeEqualityTestCase(TestCase): def assertArgumentNodeEqual(self, expected, actual): """Assert that two Argument nodes have the same data.""" - pass + self.assertWikicodeEqual(expected.name, actual.name) + if expected.default is not None: + self.assertWikicodeEqual(expected.default, actual.default) + else: + self.assertIs(None, actual.default) def assertCommentNodeEqual(self, expected, actual): """Assert that two Comment nodes have the same data.""" - pass + self.assertWikicodeEqual(expected.contents, actual.contents) def assertHeadingNodeEqual(self, expected, actual): """Assert that two Heading nodes have the same data.""" - pass + self.assertWikicodeEqual(expected.title, actual.title) + self.assertEqual(expected.level, actual.level) def assertHTMLEntityNodeEqual(self, expected, actual): """Assert that two HTMLEntity nodes have the same data.""" - pass + self.assertEqual(expected.value, actual.value) + self.assertIs(expected.named, actual.named) + self.assertIs(expected.hexadecimal, actual.hexadecimal) + self.assertEquals(expected.hex_char, actual.hex_char) def assertTagNodeEqual(self, expected, actual): """Assert that two Tag nodes have the same data.""" - pass + self.fail("Holding this until feature/html_tags is ready.") def assertTemplateNodeEqual(self, expected, actual): """Assert that two Template nodes have the same data.""" diff --git a/tests/test_builder.py b/tests/test_builder.py index a80d8bf..e6919c1 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -64,6 +64,7 @@ class TestBuilder(TreeEqualityTestCase): """tests for building Comment nodes""" pass + @unittest.skip("holding this until feature/html_tags is ready") def test_tag(self): """tests for building Tag nodes""" pass From cb23587ab6e4cb3dfc21d817f2cb7b18c5542a60 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 3 Apr 2013 11:00:07 -0400 Subject: [PATCH 068/115] Adding some Builder tests --- tests/test_builder.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/tests/test_builder.py b/tests/test_builder.py index e6919c1..d577bfc 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -26,6 +26,8 @@ import unittest from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, Tag, Template, Text, Wikilink) from mwparserfromhell.nodes.extras import Attribute, Parameter +from mwparserfromhell.parser import tokens +from mwparserfromhell.parser.builder import Builder from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode @@ -36,13 +38,34 @@ wrap = lambda L: Wikicode(SmartList(L)) class TestBuilder(TreeEqualityTestCase): """Tests for the builder, which turns tokens into Wikicode objects.""" + def setUp(self): + self.builder = Builder() + def test_text(self): """tests for building Text nodes""" - pass + tests = [ + ([tokens.Text(text="foobar")], wrap([Text("foobar")])), + ([tokens.Text(text="fóóbar")], wrap([Text("fóóbar")])), + ([tokens.Text(text="spam"), tokens.Text(text="eggs")], + wrap([Text("spam"), Text("eggs")])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) def test_template(self): """tests for building Template nodes""" - pass + tests = [ + ([tokens.TemplateOpen(), tokens.Text(text="foobar"), tokens.TemplateClose()], + wrap([Template(wrap([Text("foobar")]))])), + ([tokens.TemplateOpen(), tokens.Text(text="spam"), tokens.Text(text="eggs"), tokens.TemplateClose()], + wrap([Template(wrap([Text("spam"), Text("eggs")]))])), + ([tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), tokens.TemplateClose()], + wrap([Template(wrap([Text("foo")]), params=[Parameter(wrap([Text("1")]), wrap([Text("bar")]), showkey=False)])])), + ([tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), tokens.TemplateParamEquals(), tokens.Text(text="baz"), tokens.TemplateClose()], + wrap([Template(wrap([Text("foo")]), params=[Parameter(wrap([Text("bar")]), wrap([Text("baz")]))])])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) def test_argument(self): """tests for building Argument nodes""" From b8e8d057abc4fefec78f967adf30326669c0726c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 4 Apr 2013 10:49:04 -0400 Subject: [PATCH 069/115] Finish test_template() --- tests/test_builder.py | 50 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/tests/test_builder.py b/tests/test_builder.py index d577bfc..952b501 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -47,7 +47,7 @@ class TestBuilder(TreeEqualityTestCase): ([tokens.Text(text="foobar")], wrap([Text("foobar")])), ([tokens.Text(text="fóóbar")], wrap([Text("fóóbar")])), ([tokens.Text(text="spam"), tokens.Text(text="eggs")], - wrap([Text("spam"), Text("eggs")])), + wrap([Text("spam"), Text("eggs")])), ] for test, valid in tests: self.assertWikicodeEqual(valid, self.builder.build(test)) @@ -55,14 +55,46 @@ class TestBuilder(TreeEqualityTestCase): def test_template(self): """tests for building Template nodes""" tests = [ - ([tokens.TemplateOpen(), tokens.Text(text="foobar"), tokens.TemplateClose()], - wrap([Template(wrap([Text("foobar")]))])), - ([tokens.TemplateOpen(), tokens.Text(text="spam"), tokens.Text(text="eggs"), tokens.TemplateClose()], - wrap([Template(wrap([Text("spam"), Text("eggs")]))])), - ([tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), tokens.TemplateClose()], - wrap([Template(wrap([Text("foo")]), params=[Parameter(wrap([Text("1")]), wrap([Text("bar")]), showkey=False)])])), - ([tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), tokens.TemplateParamEquals(), tokens.Text(text="baz"), tokens.TemplateClose()], - wrap([Template(wrap([Text("foo")]), params=[Parameter(wrap([Text("bar")]), wrap([Text("baz")]))])])), + ([tokens.TemplateOpen(), tokens.Text(text="foobar"), + tokens.TemplateClose()], + wrap([Template(wrap([Text("foobar")]))])), + + ([tokens.TemplateOpen(), tokens.Text(text="spam"), + tokens.Text(text="eggs"), tokens.TemplateClose()], + wrap([Template(wrap([Text("spam"), Text("eggs")]))])), + + ([tokens.TemplateOpen(), tokens.Text(text="foo"), + tokens.TemplateParamSeparator(), tokens.Text(text="bar"), + tokens.TemplateClose()], + wrap([Template(wrap([Text("foo")]), params=[ + Parameter(wrap([Text("1")]), wrap([Text("bar")]), + showkey=False)])])), + + ([tokens.TemplateOpen(), tokens.Text(text="foo"), + tokens.TemplateParamSeparator(), tokens.Text(text="bar"), + tokens.TemplateParamEquals(), tokens.Text(text="baz"), + tokens.TemplateClose()], + wrap([Template(wrap([Text("foo")]), params=[ + Parameter(wrap([Text("bar")]), wrap([Text("baz")]))])])), + + ([tokens.TemplateOpen(), tokens.Text(text="foo"), + tokens.TemplateParamSeparator(), tokens.Text(text="bar"), + tokens.TemplateParamEquals(), tokens.Text(text="baz"), + tokens.TemplateParamSeparator(), tokens.Text(text="biz"), + tokens.TemplateParamSeparator(), tokens.Text(text="buzz"), + tokens.TemplateParamSeparator(), tokens.Text(text="3"), + tokens.TemplateParamEquals(), tokens.Text(text="buff"), + tokens.TemplateParamSeparator(), tokens.Text(text="baff"), + tokens.TemplateClose()], + wrap([Template(wrap([Text("foo")]), params=[ + Parameter(wrap([Text("bar")]), wrap([Text("baz")])), + Parameter(wrap([Text("1")]), wrap([Text("biz")]), + showkey=False), + Parameter(wrap([Text("2")]), wrap([Text("buzz")]), + showkey=False), + Parameter(wrap([Text("3")]), wrap([Text("buff")])), + Parameter(wrap([Text("3")]), wrap([Text("baff")]), + showkey=False)])])), ] for test, valid in tests: self.assertWikicodeEqual(valid, self.builder.build(test)) From e32a6692f8ad9f8d6c57a56ca40e8aedf128c074 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 4 Apr 2013 10:59:16 -0400 Subject: [PATCH 070/115] test_argument() --- tests/test_builder.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/tests/test_builder.py b/tests/test_builder.py index 952b501..e632644 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -101,7 +101,29 @@ class TestBuilder(TreeEqualityTestCase): def test_argument(self): """tests for building Argument nodes""" - pass + tests = [ + ([tokens.ArgumentOpen(), tokens.Text(text="foobar"), + tokens.ArgumentClose()], + wrap([Argument(wrap([Text("foobar")]))])), + + ([tokens.ArgumentOpen(), tokens.Text(text="spam"), + tokens.Text(text="eggs"), tokens.ArgumentClose()], + wrap([Argument(wrap([Text("spam"), Text("eggs")]))])), + + ([tokens.ArgumentOpen(), tokens.Text(text="foo"), + tokens.ArgumentSeparator(), tokens.Text(text="bar"), + tokens.ArgumentClose()], + wrap([Argument(wrap([Text("foo")]), wrap([Text("bar")]))])), + + ([tokens.ArgumentOpen(), tokens.Text(text="foo"), + tokens.Text(text="bar"), tokens.ArgumentSeparator(), + tokens.Text(text="baz"), tokens.Text(text="biz"), + tokens.ArgumentClose()], + wrap([Argument(wrap([Text("foo"), Text("bar")]), + wrap([Text("baz"), Text("biz")]))])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) def test_wikilink(self): """tests for building Wikilink nodes""" From 7289d8c070a6fcd2bceaa8e00e7661c9c21461a5 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 5 Apr 2013 10:25:48 -0400 Subject: [PATCH 071/115] test_wikilink(); fix indentation --- tests/test_builder.py | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/tests/test_builder.py b/tests/test_builder.py index e632644..ea38dae 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -68,7 +68,7 @@ class TestBuilder(TreeEqualityTestCase): tokens.TemplateClose()], wrap([Template(wrap([Text("foo")]), params=[ Parameter(wrap([Text("1")]), wrap([Text("bar")]), - showkey=False)])])), + showkey=False)])])), ([tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), @@ -89,12 +89,12 @@ class TestBuilder(TreeEqualityTestCase): wrap([Template(wrap([Text("foo")]), params=[ Parameter(wrap([Text("bar")]), wrap([Text("baz")])), Parameter(wrap([Text("1")]), wrap([Text("biz")]), - showkey=False), + showkey=False), Parameter(wrap([Text("2")]), wrap([Text("buzz")]), - showkey=False), + showkey=False), Parameter(wrap([Text("3")]), wrap([Text("buff")])), Parameter(wrap([Text("3")]), wrap([Text("baff")]), - showkey=False)])])), + showkey=False)])])), ] for test, valid in tests: self.assertWikicodeEqual(valid, self.builder.build(test)) @@ -127,7 +127,29 @@ class TestBuilder(TreeEqualityTestCase): def test_wikilink(self): """tests for building Wikilink nodes""" - pass + tests = [ + ([tokens.WikilinkOpen(), tokens.Text(text="foobar"), + tokens.WikilinkClose()], + wrap([Wikilink(wrap([Text("foobar")]))])), + + ([tokens.WikilinkOpen(), tokens.Text(text="spam"), + tokens.Text(text="eggs"), tokens.WikilinkClose()], + wrap([Wikilink(wrap([Text("spam"), Text("eggs")]))])), + + ([tokens.WikilinkOpen(), tokens.Text(text="foo"), + tokens.WikilinkSeparator(), tokens.Text(text="bar"), + tokens.WikilinkClose()], + wrap([Wikilink(wrap([Text("foo")]), wrap([Text("bar")]))])), + + ([tokens.WikilinkOpen(), tokens.Text(text="foo"), + tokens.Text(text="bar"), tokens.WikilinkSeparator(), + tokens.Text(text="baz"), tokens.Text(text="biz"), + tokens.WikilinkClose()], + wrap([Wikilink(wrap([Text("foo"), Text("bar")]), + wrap([Text("baz"), Text("biz")]))])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) def test_html_entity(self): """tests for building HTMLEntity nodes""" From e9463543f46c49748740f69c5e5bcdb569338a2a Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 5 Apr 2013 10:46:43 -0400 Subject: [PATCH 072/115] test_html_entity() --- tests/test_builder.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/tests/test_builder.py b/tests/test_builder.py index ea38dae..7dcbc0e 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -153,7 +153,23 @@ class TestBuilder(TreeEqualityTestCase): def test_html_entity(self): """tests for building HTMLEntity nodes""" - pass + tests = [ + ([tokens.HTMLEntityStart(), tokens.Text(text="nbsp"), + tokens.HTMLEntityEnd()], + wrap([HTMLEntity("nbsp", named=True, hexadecimal=False)])), + + ([tokens.HTMLEntityStart(), tokens.HTMLEntityNumeric(), + tokens.Text(text="107"), tokens.HTMLEntityEnd()], + wrap([HTMLEntity("107", named=False, hexadecimal=False)])), + + ([tokens.HTMLEntityStart(), tokens.HTMLEntityNumeric(), + tokens.HTMLEntityHex(char="X"), tokens.Text(text="6B"), + tokens.HTMLEntityEnd()], + wrap([HTMLEntity("6B", named=False, hexadecimal=True, + hex_char="X")])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) def test_heading(self): """tests for building Heading nodes""" From 132c6584d059497374c7f0c53285e6251beb6675 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 5 Apr 2013 10:52:43 -0400 Subject: [PATCH 073/115] test_heading() and test_comment() --- tests/test_builder.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/tests/test_builder.py b/tests/test_builder.py index 7dcbc0e..410eb4a 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -173,11 +173,31 @@ class TestBuilder(TreeEqualityTestCase): def test_heading(self): """tests for building Heading nodes""" - pass + tests = [ + ([tokens.HeadingStart(level=2), tokens.Text(text="foobar"), + tokens.HeadingEnd()], + wrap([Heading(wrap([Text("foobar")]), 2)])), + + ([tokens.HeadingStart(level=4), tokens.Text(text="spam"), + tokens.Text(text="eggs"), tokens.HeadingEnd()], + wrap([Heading(wrap([Text("spam"), Text("eggs")]), 4)])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) def test_comment(self): """tests for building Comment nodes""" - pass + tests = [ + ([tokens.CommentStart(), tokens.Text(text="foobar"), + tokens.CommentEnd()], + wrap([Comment(wrap([Text("foobar")]))])), + + ([tokens.CommentStart(), tokens.Text(text="spam"), + tokens.Text(text="eggs"), tokens.CommentEnd()], + wrap([Comment(wrap([Text("spam"), Text("eggs")]))])), + ] + for test, valid in tests: + self.assertWikicodeEqual(valid, self.builder.build(test)) @unittest.skip("holding this until feature/html_tags is ready") def test_tag(self): From 094e867ee6d7a2f34c6555e318ccdb1622526484 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 6 Apr 2013 15:45:51 -0400 Subject: [PATCH 074/115] Add test_integration(); add a horrible abuse of PEP8 --- tests/_test_tree_equality.py | 2 +- tests/test_builder.py | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index 2014ac1..758a72e 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -74,7 +74,7 @@ class TreeEqualityTestCase(TestCase): self.assertEqual(expected.value, actual.value) self.assertIs(expected.named, actual.named) self.assertIs(expected.hexadecimal, actual.hexadecimal) - self.assertEquals(expected.hex_char, actual.hex_char) + self.assertEqual(expected.hex_char, actual.hex_char) def assertTagNodeEqual(self, expected, actual): """Assert that two Tag nodes have the same data.""" diff --git a/tests/test_builder.py b/tests/test_builder.py index 410eb4a..9425713 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -204,5 +204,25 @@ class TestBuilder(TreeEqualityTestCase): """tests for building Tag nodes""" pass + def test_integration(self): + """a test for building a combination of templates together""" + test = [tokens.TemplateOpen(), tokens.TemplateOpen(), + tokens.TemplateOpen(), tokens.TemplateOpen(), + tokens.Text(text="foo"), tokens.TemplateClose(), + tokens.Text(text="bar"), tokens.TemplateParamSeparator(), + tokens.Text(text="baz"), tokens.TemplateParamEquals(), + tokens.Text(text="biz"), tokens.TemplateClose(), + tokens.Text(text="buzz"), tokens.TemplateClose(), + tokens.Text(text="usr"), tokens.TemplateParamSeparator(), + tokens.TemplateOpen(), tokens.Text(text="bin"), + tokens.TemplateClose(), tokens.TemplateClose()] + valid = wrap( + [Template(wrap([Template(wrap([Template(wrap([Template(wrap([Text( + "foo")])), Text("bar")]), params=[Parameter(wrap([Text("baz")]), + wrap([Text("biz")]))]), Text("buzz")])), Text("usr")]), params=[ + Parameter(wrap([Text("1")]), wrap([Template(wrap([Text("bin")]))]), + showkey=False)])]) + self.assertWikicodeEqual(valid, self.builder.build(test)) + if __name__ == "__main__": unittest.main(verbosity=2) From 2d9b8a39b6509d8a39dcf12b90dbcb2e8f07433f Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 6 Apr 2013 16:17:47 -0400 Subject: [PATCH 075/115] test_integration2(); finish TestBuilder --- tests/test_builder.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/test_builder.py b/tests/test_builder.py index 9425713..1e578ed 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -206,6 +206,7 @@ class TestBuilder(TreeEqualityTestCase): def test_integration(self): """a test for building a combination of templates together""" + # {{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}} test = [tokens.TemplateOpen(), tokens.TemplateOpen(), tokens.TemplateOpen(), tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateClose(), @@ -224,5 +225,37 @@ class TestBuilder(TreeEqualityTestCase): showkey=False)])]) self.assertWikicodeEqual(valid, self.builder.build(test)) + def test_integration2(self): + """an even more audacious test for building a horrible wikicode mess""" + # {{a|b|{{c|[[d]]{{{e}}}}}}}[[f|{{{g}}}]]{{i|j= }} + test = [tokens.TemplateOpen(), tokens.Text(text="a"), + tokens.TemplateParamSeparator(), tokens.Text(text="b"), + tokens.TemplateParamSeparator(), tokens.TemplateOpen(), + tokens.Text(text="c"), tokens.TemplateParamSeparator(), + tokens.WikilinkOpen(), tokens.Text(text="d"), + tokens.WikilinkClose(), tokens.ArgumentOpen(), + tokens.Text(text="e"), tokens.ArgumentClose(), + tokens.TemplateClose(), tokens.TemplateClose(), + tokens.WikilinkOpen(), tokens.Text(text="f"), + tokens.WikilinkSeparator(), tokens.ArgumentOpen(), + tokens.Text(text="g"), tokens.ArgumentClose(), + tokens.CommentStart(), tokens.Text(text="h"), + tokens.CommentEnd(), tokens.WikilinkClose(), + tokens.TemplateOpen(), tokens.Text(text="i"), + tokens.TemplateParamSeparator(), tokens.Text(text="j"), + tokens.TemplateParamEquals(), tokens.HTMLEntityStart(), + tokens.Text(text="nbsp"), tokens.HTMLEntityEnd(), + tokens.TemplateClose()] + valid = wrap( + [Template(wrap([Text("a")]), params=[Parameter(wrap([Text("1")]), + wrap([Text("b")]), showkey=False), Parameter(wrap([Text("2")]), + wrap([Template(wrap([Text("c")]), params=[Parameter(wrap([Text("1") + ]), wrap([Wikilink(wrap([Text("d")])), Argument(wrap([Text("e")]))] + ), showkey=False)])]), showkey=False)]), Wikilink(wrap([Text("f")] + ), wrap([Argument(wrap([Text("g")])), Comment(wrap([Text("h")]))]) + ), Template(wrap([Text("i")]), params=[Parameter(wrap([Text("j")]), + wrap([HTMLEntity("nbsp", named=True)]))])]) + self.assertWikicodeEqual(valid, self.builder.build(test)) + if __name__ == "__main__": unittest.main(verbosity=2) From b0e3cd9cae58a0ac8490d2ee0c9b87e05de456b5 Mon Sep 17 00:00:00 2001 From: Kunal Mehta Date: Thu, 18 Apr 2013 18:23:08 -0500 Subject: [PATCH 076/115] Fix Pywikipedia references in documentation --- docs/integration.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/integration.rst b/docs/integration.rst index d0e54db..bd4e0ac 100644 --- a/docs/integration.rst +++ b/docs/integration.rst @@ -7,12 +7,12 @@ Integration :py:func:`mwparserfromhell.parse() ` on :py:meth:`~earwigbot.wiki.page.Page.get`. -If you're using PyWikipedia_, your code might look like this:: +If you're using Pywikipedia_, your code might look like this:: import mwparserfromhell import wikipedia as pywikibot def parse(title): - site = pywikibot.get_site() + site = pywikibot.getSite() page = pywikibot.Page(site, title) text = page.get() return mwparserfromhell.parse(text) @@ -31,5 +31,5 @@ following code (via the API_):: return mwparserfromhell.parse(text) .. _EarwigBot: https://github.com/earwig/earwigbot -.. _PyWikipedia: http://pywikipediabot.sourceforge.net/ +.. _Pywikipedia: http://pywikipediabot.sourceforge.net/ .. _API: http://mediawiki.org/wiki/API From 9c7517b22a6ff0f0ab8834b2e39bf56d886d6989 Mon Sep 17 00:00:00 2001 From: Kunal Mehta Date: Thu, 18 Apr 2013 18:32:51 -0500 Subject: [PATCH 077/115] Link to mediawiki.org instead of sf.net --- docs/integration.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/integration.rst b/docs/integration.rst index bd4e0ac..78810b8 100644 --- a/docs/integration.rst +++ b/docs/integration.rst @@ -31,5 +31,5 @@ following code (via the API_):: return mwparserfromhell.parse(text) .. _EarwigBot: https://github.com/earwig/earwigbot -.. _Pywikipedia: http://pywikipediabot.sourceforge.net/ +.. _Pywikipedia: https://www.mediawiki.org/wiki/Manual:Pywikipediabot .. _API: http://mediawiki.org/wiki/API From 6e399275263af3feae4bcd43ae17ccd5c7d2d1b6 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 18 Apr 2013 20:06:27 -0400 Subject: [PATCH 078/115] Update README with same changes (#27) --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 90e896f..77262ca 100644 --- a/README.rst +++ b/README.rst @@ -107,12 +107,12 @@ Integration ``Page`` objects have a ``parse`` method that essentially calls ``mwparserfromhell.parse()`` on ``page.get()``. -If you're using PyWikipedia_, your code might look like this:: +If you're using Pywikipedia_, your code might look like this:: import mwparserfromhell import wikipedia as pywikibot def parse(title): - site = pywikibot.get_site() + site = pywikibot.getSite() page = pywikibot.Page(site, title) text = page.get() return mwparserfromhell.parse(text) @@ -138,5 +138,5 @@ following code (via the API_):: .. _Python Package Index: http://pypi.python.org .. _get pip: http://pypi.python.org/pypi/pip .. _EarwigBot: https://github.com/earwig/earwigbot -.. _PyWikipedia: http://pywikipediabot.sourceforge.net/ +.. _Pywikipedia: https://www.mediawiki.org/wiki/Manual:Pywikipediabot .. _API: http://mediawiki.org/wiki/API From 8db40689edf51d6febfaae3340fc6af6d34329ad Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 20 Apr 2013 17:59:20 -0400 Subject: [PATCH 079/115] Improve a few things about nodes; simply a method in Wikicode. --- mwparserfromhell/nodes/html_entity.py | 47 ++++++++++++++++++++++++++++------- mwparserfromhell/nodes/wikilink.py | 5 +++- mwparserfromhell/wikicode.py | 8 +----- 3 files changed, 43 insertions(+), 17 deletions(-) diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py index 5b7607c..1bf1c78 100644 --- a/mwparserfromhell/nodes/html_entity.py +++ b/mwparserfromhell/nodes/html_entity.py @@ -63,7 +63,8 @@ class HTMLEntity(Node): return self.normalize() return self - def _unichr(self, value): + @staticmethod + def _unichr(value): """Implement the builtin unichr() with support for non-BMP code points. On wide Python builds, this functions like the normal unichr(). On @@ -119,19 +120,47 @@ class HTMLEntity(Node): @value.setter def value(self, newval): newval = str(newval) - if newval not in htmlentities.entitydefs: - test = int(self.value, 16) - if test < 0 or (test > 0x10FFFF and int(self.value) > 0x10FFFF): - raise ValueError(newval) + try: + int(newval) + except ValueError: + try: + int(newval, 16) + except ValueError: + if newval not in htmlentities.entitydefs: + raise ValueError("entity value is not a valid name") + self._named = True + self._hexadecimal = False + else: + if int(newval, 16) < 0 or int(newval, 16) > 0x10FFFF: + raise ValueError("entity value is not in range(0x110000)") + self._named = False + self._hexadecimal = True + else: + test = int(newval, 16 if self.hexadecimal else 10) + if test < 0 or test > 0x10FFFF: + raise ValueError("entity value is not in range(0x110000)") + self._named = False self._value = newval @named.setter def named(self, newval): - self._named = bool(newval) + newval = bool(newval) + if newval and self.value not in htmlentities.entitydefs: + raise ValueError("entity value is not a valid name") + if not newval: + try: + int(self.value, 16) + except ValueError: + err = "current entity value is not a valid Unicode codepoint" + raise ValueError(err) + self._named = newval @hexadecimal.setter def hexadecimal(self, newval): - self._hexadecimal = bool(newval) + newval = bool(newval) + if newval and self.named: + raise ValueError("a named entity cannot be hexadecimal") + self._hexadecimal = newval @hex_char.setter def hex_char(self, newval): @@ -145,5 +174,5 @@ class HTMLEntity(Node): if self.named: return unichr(htmlentities.name2codepoint[self.value]) if self.hexadecimal: - return self._unichr(int(self.value, 16)) - return self._unichr(int(self.value)) + return HTMLEntity._unichr(int(self.value, 16)) + return HTMLEntity._unichr(int(self.value)) diff --git a/mwparserfromhell/nodes/wikilink.py b/mwparserfromhell/nodes/wikilink.py index 6fea468..527e9bb 100644 --- a/mwparserfromhell/nodes/wikilink.py +++ b/mwparserfromhell/nodes/wikilink.py @@ -79,4 +79,7 @@ class Wikilink(Node): @text.setter def text(self, value): - self._text = parse_anything(value) + if value is None: + self._text = None + else: + self._text = parse_anything(value) diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 8d8ebe2..f2d9c89 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -88,13 +88,7 @@ class Wikicode(StringMixIn): If *obj* is a ``Node``, the function will test whether they are the same object, otherwise it will compare them with ``==``. """ - if isinstance(obj, Node): - if node is obj: - return True - else: - if node == obj: - return True - return False + return (node is obj) if isinstance(obj, Node) else (node == obj) def _contains(self, nodes, obj): """Return ``True`` if *obj* is inside of *nodes*, else ``False``. From 5cf451eb22aa47b119183eb25de141627d0e1ef7 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 20 Apr 2013 18:01:39 -0400 Subject: [PATCH 080/115] Adding a bunch of tests for different nodes. --- tests/test_argument.py | 99 ++++++++++++++++++++++++++ tests/test_comment.py | 62 +++++++++++++++++ tests/test_heading.py | 88 ++++++++++++++++++++++++ tests/test_html_entity.py | 172 ++++++++++++++++++++++++++++++++++++++++++++++ tests/test_text.py | 69 +++++++++++++++++++ tests/test_wikilink.py | 99 ++++++++++++++++++++++++++ 6 files changed, 589 insertions(+) create mode 100644 tests/test_argument.py create mode 100644 tests/test_comment.py create mode 100644 tests/test_heading.py create mode 100644 tests/test_html_entity.py create mode 100644 tests/test_text.py create mode 100644 tests/test_wikilink.py diff --git a/tests/test_argument.py b/tests/test_argument.py new file mode 100644 index 0000000..e0524c4 --- /dev/null +++ b/tests/test_argument.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import str +from mwparserfromhell.nodes import Argument, Text +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.wikicode import Wikicode + +from ._test_tree_equality import TreeEqualityTestCase + +wrap = lambda L: Wikicode(SmartList(L)) + +class TestArgument(TreeEqualityTestCase): + """Test cases for the Argument node.""" + + def test_unicode(self): + """test Argument.__unicode__()""" + node = Argument(wrap([Text("foobar")])) + self.assertEqual("{{{foobar}}}", str(node)) + node2 = Argument(wrap([Text("foo")]), wrap([Text("bar")])) + self.assertEqual("{{{foo|bar}}}", str(node2)) + + def test_strip(self): + """test Argument.__strip__()""" + node = Argument(wrap([Text("foobar")])) + self.assertIs(None, node.__strip__(True, True)) + self.assertIs(None, node.__strip__(True, False)) + self.assertIs(None, node.__strip__(False, True)) + self.assertIs(None, node.__strip__(False, False)) + + node2 = Argument(wrap([Text("foo")]), wrap([Text("bar")])) + self.assertEqual("bar", node2.__strip__(True, True)) + self.assertEqual("bar", node2.__strip__(True, False)) + self.assertEqual("bar", node2.__strip__(False, True)) + self.assertEqual("bar", node2.__strip__(False, False)) + + def test_showtree(self): + """test Argument.__showtree__()""" + output = [] + getter, marker = object(), object() + get = lambda code: output.append((getter, code)) + mark = lambda: output.append(marker) + node1 = Argument(wrap([Text("foobar")])) + node2 = Argument(wrap([Text("foo")]), wrap([Text("bar")])) + node1.__showtree__(output.append, get, mark) + node2.__showtree__(output.append, get, mark) + valid = [ + "{{{", (getter, node1.name), "}}}", "{{{", (getter, node2.name), + " | ", marker, (getter, node2.default), "}}}"] + self.assertEqual(valid, output) + + def test_name(self): + """test getter/setter for the name attribute""" + name = wrap([Text("foobar")]) + node1 = Argument(name) + node2 = Argument(name, wrap([Text("baz")])) + self.assertIs(name, node1.name) + self.assertIs(name, node2.name) + node1.name = "héhehé" + node2.name = "héhehé" + self.assertWikicodeEqual(wrap([Text("héhehé")]), node1.name) + self.assertWikicodeEqual(wrap([Text("héhehé")]), node2.name) + + def test_default(self): + """test getter/setter for the default attribute""" + default = wrap([Text("baz")]) + node1 = Argument(wrap([Text("foobar")])) + node2 = Argument(wrap([Text("foobar")]), default) + self.assertIs(None, node1.default) + self.assertIs(default, node2.default) + node1.default = "buzz" + node2.default = None + self.assertWikicodeEqual(wrap([Text("buzz")]), node1.default) + self.assertIs(None, node2.default) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_comment.py b/tests/test_comment.py new file mode 100644 index 0000000..980f594 --- /dev/null +++ b/tests/test_comment.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import str +from mwparserfromhell.nodes import Comment + +from ._test_tree_equality import TreeEqualityTestCase + +class TestComment(TreeEqualityTestCase): + """Test cases for the Comment node.""" + + def test_unicode(self): + """test Comment.__unicode__()""" + node = Comment("foobar") + self.assertEqual("", str(node)) + + def test_strip(self): + """test Comment.__strip__()""" + node = Comment("foobar") + self.assertIs(None, node.__strip__(True, True)) + self.assertIs(None, node.__strip__(True, False)) + self.assertIs(None, node.__strip__(False, True)) + self.assertIs(None, node.__strip__(False, False)) + + def test_showtree(self): + """test Comment.__showtree__()""" + output = [] + node = Comment("foobar") + node.__showtree__(output.append, None, None) + self.assertEqual([""], output) + + def test_contents(self): + """test getter/setter for the contents attribute""" + node = Comment("foobar") + self.assertEqual("foobar", node.contents) + node.contents = "barfoo" + self.assertEqual("barfoo", node.contents) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_heading.py b/tests/test_heading.py new file mode 100644 index 0000000..a0e78e5 --- /dev/null +++ b/tests/test_heading.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import str +from mwparserfromhell.nodes import Heading, Text +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.wikicode import Wikicode + +from ._test_tree_equality import TreeEqualityTestCase + +wrap = lambda L: Wikicode(SmartList(L)) + +class TestHeading(TreeEqualityTestCase): + """Test cases for the Heading node.""" + + def test_unicode(self): + """test Heading.__unicode__()""" + node = Heading(wrap([Text("foobar")]), 2) + self.assertEqual("==foobar==", str(node)) + node2 = Heading(wrap([Text(" zzz ")]), 5) + self.assertEqual("===== zzz =====", str(node2)) + + def test_strip(self): + """test Heading.__strip__()""" + node = Heading(wrap([Text("foobar")]), 3) + self.assertEqual("foobar", node.__strip__(True, True)) + self.assertEqual("foobar", node.__strip__(True, False)) + self.assertEqual("foobar", node.__strip__(False, True)) + self.assertEqual("foobar", node.__strip__(False, False)) + + def test_showtree(self): + """test Heading.__showtree__()""" + output = [] + getter = object() + get = lambda code: output.append((getter, code)) + node1 = Heading(wrap([Text("foobar")]), 3) + node2 = Heading(wrap([Text(" baz ")]), 4) + node1.__showtree__(output.append, get, None) + node2.__showtree__(output.append, get, None) + valid = ["===", (getter, node1.title), "===", + "====", (getter, node2.title), "===="] + self.assertEqual(valid, output) + + def test_title(self): + """test getter/setter for the title attribute""" + title = wrap([Text("foobar")]) + node = Heading(title, 3) + self.assertIs(title, node.title) + node.title = "héhehé" + self.assertWikicodeEqual(wrap([Text("héhehé")]), node.title) + + def test_level(self): + """test getter/setter for the level attribute""" + node = Heading(wrap([Text("foobar")]), 3) + self.assertEqual(3, node.level) + node.level = 5 + self.assertEqual(5, node.level) + node.level = True + self.assertEqual(1, node.level) + self.assertRaises(ValueError, setattr, node, "level", 0) + self.assertRaises(ValueError, setattr, node, "level", 7) + self.assertRaises(ValueError, setattr, node, "level", "abc") + self.assertRaises(ValueError, setattr, node, "level", False) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py new file mode 100644 index 0000000..20c8fc0 --- /dev/null +++ b/tests/test_html_entity.py @@ -0,0 +1,172 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import str +from mwparserfromhell.nodes import HTMLEntity +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.wikicode import Wikicode + +from ._test_tree_equality import TreeEqualityTestCase + +wrap = lambda L: Wikicode(SmartList(L)) + +class TestHTMLEntity(TreeEqualityTestCase): + """Test cases for the HTMLEntity node.""" + + def test_unicode(self): + """test HTMLEntity.__unicode__()""" + node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) + node2 = HTMLEntity("107", named=False, hexadecimal=False) + node3 = HTMLEntity("6b", named=False, hexadecimal=True) + node4 = HTMLEntity("6C", named=False, hexadecimal=True, hex_char="X") + self.assertEqual(" ", str(node1)) + self.assertEqual("k", str(node2)) + self.assertEqual("k", str(node3)) + self.assertEqual("l", str(node4)) + + def test_strip(self): + """test HTMLEntity.__strip__()""" + node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) + node2 = HTMLEntity("107", named=False, hexadecimal=False) + node3 = HTMLEntity("e9", named=False, hexadecimal=True) + + self.assertEqual("\xa0", node1.__strip__(True, True)) + self.assertEqual("\xa0", node1.__strip__(True, False)) + self.assertEqual(" ", node1.__strip__(False, True)) + self.assertEqual(" ", node1.__strip__(False, False)) + self.assertEqual("k", node2.__strip__(True, True)) + self.assertEqual("k", node2.__strip__(True, False)) + self.assertEqual("k", node2.__strip__(False, True)) + self.assertEqual("k", node2.__strip__(False, False)) + self.assertEqual("é", node3.__strip__(True, True)) + self.assertEqual("é", node3.__strip__(True, False)) + self.assertEqual("é", node3.__strip__(False, True)) + self.assertEqual("é", node3.__strip__(False, False)) + + def test_showtree(self): + """test HTMLEntity.__showtree__()""" + output = [] + node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) + node2 = HTMLEntity("107", named=False, hexadecimal=False) + node3 = HTMLEntity("e9", named=False, hexadecimal=True) + node1.__showtree__(output.append, None, None) + node2.__showtree__(output.append, None, None) + node3.__showtree__(output.append, None, None) + res = [" ", "k", "é"] + self.assertEqual(res, output) + + def test_value(self): + """test HTMLEntity.value()""" + node1 = HTMLEntity("nbsp") + node2 = HTMLEntity("107") + node3 = HTMLEntity("e9") + self.assertEquals("nbsp", node1.value) + self.assertEquals("107", node2.value) + self.assertEquals("e9", node3.value) + + node1.value = "ffa4" + node2.value = 72 + node3.value = "Sigma" + self.assertEquals("ffa4", node1.value) + self.assertFalse(node1.named) + self.assertTrue(node1.hexadecimal) + self.assertEquals("72", node2.value) + self.assertFalse(node2.named) + self.assertFalse(node2.hexadecimal) + self.assertEquals("Sigma", node3.value) + self.assertTrue(node3.named) + self.assertFalse(node3.hexadecimal) + + node1.value = "10FFFF" + node2.value = 110000 + node2.value = 1114111 + self.assertRaises(ValueError, setattr, node3, "value", "") + self.assertRaises(ValueError, setattr, node3, "value", "foobar") + self.assertRaises(ValueError, setattr, node3, "value", True) + self.assertRaises(ValueError, setattr, node3, "value", -1) + self.assertRaises(ValueError, setattr, node1, "value", 110000) + self.assertRaises(ValueError, setattr, node1, "value", "1114112") + + def test_named(self): + """test HTMLEntity.named()""" + node1 = HTMLEntity("nbsp") + node2 = HTMLEntity("107") + node3 = HTMLEntity("e9") + self.assertTrue(node1.named) + self.assertFalse(node2.named) + self.assertFalse(node3.named) + node1.named = 1 + node2.named = 0 + node3.named = 0 + self.assertTrue(node1.named) + self.assertFalse(node2.named) + self.assertFalse(node3.named) + self.assertRaises(ValueError, setattr, node1, "named", False) + self.assertRaises(ValueError, setattr, node2, "named", True) + self.assertRaises(ValueError, setattr, node3, "named", True) + + def test_hexadecimal(self): + """test HTMLEntity.hexadecimal()""" + node1 = HTMLEntity("nbsp") + node2 = HTMLEntity("107") + node3 = HTMLEntity("e9") + self.assertFalse(node1.hexadecimal) + self.assertFalse(node2.hexadecimal) + self.assertTrue(node3.hexadecimal) + node1.hexadecimal = False + node2.hexadecimal = True + node3.hexadecimal = False + self.assertFalse(node1.hexadecimal) + self.assertTrue(node2.hexadecimal) + self.assertFalse(node3.hexadecimal) + self.assertRaises(ValueError, setattr, node1, "hexadecimal", True) + + def test_hex_char(self): + """test HTMLEntity.hex_char()""" + node1 = HTMLEntity("e9") + node2 = HTMLEntity("e9", hex_char="X") + self.assertEquals("x", node1.hex_char) + self.assertEquals("X", node2.hex_char) + node1.hex_char = "X" + node2.hex_char = "x" + self.assertEquals("X", node1.hex_char) + self.assertEquals("x", node2.hex_char) + self.assertRaises(ValueError, setattr, node1, "hex_char", 123) + self.assertRaises(ValueError, setattr, node1, "hex_char", "foobar") + self.assertRaises(ValueError, setattr, node1, "hex_char", True) + + def test_normalize(self): + """test HTMLEntity.normalize()""" + node1 = HTMLEntity("nbsp") + node2 = HTMLEntity("107") + node3 = HTMLEntity("e9") + node4 = HTMLEntity("1f648") + self.assertEquals("\xa0", node1.normalize()) + self.assertEquals("k", node2.normalize()) + self.assertEquals("é", node3.normalize()) + self.assertEquals("\U0001F648", node4.normalize()) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_text.py b/tests/test_text.py new file mode 100644 index 0000000..13636bf --- /dev/null +++ b/tests/test_text.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import str +from mwparserfromhell.nodes import Text + +class TestText(unittest.TestCase): + """Test cases for the Text node.""" + + def test_unicode(self): + """test Text.__unicode__()""" + node = Text("foobar") + self.assertEqual("foobar", str(node)) + node2 = Text("fóóbar") + self.assertEqual("fóóbar", str(node2)) + + def test_strip(self): + """test Text.__strip__()""" + node = Text("foobar") + self.assertIs(node, node.__strip__(True, True)) + self.assertIs(node, node.__strip__(True, False)) + self.assertIs(node, node.__strip__(False, True)) + self.assertIs(node, node.__strip__(False, False)) + + def test_showtree(self): + """test Text.__showtree__()""" + output = [] + node1 = Text("foobar") + node2 = Text("fóóbar") + node3 = Text("𐌲𐌿𐍄") + node1.__showtree__(output.append, None, None) + node2.__showtree__(output.append, None, None) + node3.__showtree__(output.append, None, None) + res = ["foobar", r"f\xf3\xf3bar", "\\U00010332\\U0001033f\\U00010344"] + self.assertEqual(res, output) + + def test_value(self): + """test getter/setter for the value attribute""" + node = Text("foobar") + self.assertEqual("foobar", node.value) + self.assertIsInstance(node.value, str) + node.value = "héhéhé" + self.assertEqual("héhéhé", node.value) + self.assertIsInstance(node.value, str) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py new file mode 100644 index 0000000..422489f --- /dev/null +++ b/tests/test_wikilink.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import str +from mwparserfromhell.nodes import Text, Wikilink +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.wikicode import Wikicode + +from ._test_tree_equality import TreeEqualityTestCase + +wrap = lambda L: Wikicode(SmartList(L)) + +class TestWikilink(TreeEqualityTestCase): + """Test cases for the Wikilink node.""" + + def test_unicode(self): + """test Wikilink.__unicode__()""" + node = Wikilink(wrap([Text("foobar")])) + self.assertEqual("[[foobar]]", str(node)) + node2 = Wikilink(wrap([Text("foo")]), wrap([Text("bar")])) + self.assertEqual("[[foo|bar]]", str(node2)) + + def test_strip(self): + """test Wikilink.__strip__()""" + node = Wikilink(wrap([Text("foobar")])) + self.assertEqual("foobar", node.__strip__(True, True)) + self.assertEqual("foobar", node.__strip__(True, False)) + self.assertEqual("foobar", node.__strip__(False, True)) + self.assertEqual("foobar", node.__strip__(False, False)) + + node2 = Wikilink(wrap([Text("foo")]), wrap([Text("bar")])) + self.assertEqual("bar", node2.__strip__(True, True)) + self.assertEqual("bar", node2.__strip__(True, False)) + self.assertEqual("bar", node2.__strip__(False, True)) + self.assertEqual("bar", node2.__strip__(False, False)) + + def test_showtree(self): + """test Wikilink.__showtree__()""" + output = [] + getter, marker = object(), object() + get = lambda code: output.append((getter, code)) + mark = lambda: output.append(marker) + node1 = Wikilink(wrap([Text("foobar")])) + node2 = Wikilink(wrap([Text("foo")]), wrap([Text("bar")])) + node1.__showtree__(output.append, get, mark) + node2.__showtree__(output.append, get, mark) + valid = [ + "[[", (getter, node1.title), "]]", "[[", (getter, node2.title), + " | ", marker, (getter, node2.text), "]]"] + self.assertEqual(valid, output) + + def test_title(self): + """test getter/setter for the title attribute""" + title = wrap([Text("foobar")]) + node1 = Wikilink(title) + node2 = Wikilink(title, wrap([Text("baz")])) + self.assertIs(title, node1.title) + self.assertIs(title, node2.title) + node1.title = "héhehé" + node2.title = "héhehé" + self.assertWikicodeEqual(wrap([Text("héhehé")]), node1.title) + self.assertWikicodeEqual(wrap([Text("héhehé")]), node2.title) + + def test_text(self): + """test getter/setter for the text attribute""" + text = wrap([Text("baz")]) + node1 = Wikilink(wrap([Text("foobar")])) + node2 = Wikilink(wrap([Text("foobar")]), text) + self.assertIs(None, node1.text) + self.assertIs(text, node2.text) + node1.text = "buzz" + node2.text = None + self.assertWikicodeEqual(wrap([Text("buzz")]), node1.text) + self.assertIs(None, node2.text) + +if __name__ == "__main__": + unittest.main(verbosity=2) From 6bf8cfd2adcf536113f3a9ace3901b08540d7ff9 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 20 Apr 2013 18:09:34 -0400 Subject: [PATCH 081/115] Fix tests for Python 3. --- mwparserfromhell/nodes/html_entity.py | 55 ++++++++++++++++++----------------- tests/test_html_entity.py | 28 +++++++++--------- 2 files changed, 43 insertions(+), 40 deletions(-) diff --git a/mwparserfromhell/nodes/html_entity.py b/mwparserfromhell/nodes/html_entity.py index 1bf1c78..b51bd92 100644 --- a/mwparserfromhell/nodes/html_entity.py +++ b/mwparserfromhell/nodes/html_entity.py @@ -23,7 +23,7 @@ from __future__ import unicode_literals from . import Node -from ..compat import htmlentities, str +from ..compat import htmlentities, py3k, str __all__ = ["HTMLEntity"] @@ -63,29 +63,31 @@ class HTMLEntity(Node): return self.normalize() return self - @staticmethod - def _unichr(value): - """Implement the builtin unichr() with support for non-BMP code points. + if not py3k: + @staticmethod + def _unichr(value): + """Implement builtin unichr() with support for non-BMP code points. - On wide Python builds, this functions like the normal unichr(). On - narrow builds, this returns the value's corresponding surrogate pair. - """ - try: - return unichr(value) - except ValueError: - # Test whether we're on the wide or narrow Python build. Check the - # length of a non-BMP code point (U+1F64A, SPEAK-NO-EVIL MONKEY): - if len("\U0001F64A") == 2: - # Ensure this is within the range we can encode: - if value > 0x10FFFF: - raise ValueError("unichr() arg not in range(0x110000)") - code = value - 0x10000 - if value < 0: # Invalid code point - raise - lead = 0xD800 + (code >> 10) - trail = 0xDC00 + (code % (1 << 10)) - return unichr(lead) + unichr(trail) - raise + On wide Python builds, this functions like the normal unichr(). On + narrow builds, this returns the value's encoded surrogate pair. + """ + try: + return unichr(value) + except ValueError: + # Test whether we're on the wide or narrow Python build. Check + # the length of a non-BMP code point + # (U+1F64A, SPEAK-NO-EVIL MONKEY): + if len("\U0001F64A") == 2: + # Ensure this is within the range we can encode: + if value > 0x10FFFF: + raise ValueError("unichr() arg not in range(0x110000)") + code = value - 0x10000 + if value < 0: # Invalid code point + raise + lead = 0xD800 + (code >> 10) + trail = 0xDC00 + (code % (1 << 10)) + return unichr(lead) + unichr(trail) + raise @property def value(self): @@ -171,8 +173,9 @@ class HTMLEntity(Node): def normalize(self): """Return the unicode character represented by the HTML entity.""" + chrfunc = chr if py3k else HTMLEntity._unichr if self.named: - return unichr(htmlentities.name2codepoint[self.value]) + return chrfunc(htmlentities.name2codepoint[self.value]) if self.hexadecimal: - return HTMLEntity._unichr(int(self.value, 16)) - return HTMLEntity._unichr(int(self.value)) + return chrfunc(int(self.value, 16)) + return chrfunc(int(self.value)) diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py index 20c8fc0..4bf32e8 100644 --- a/tests/test_html_entity.py +++ b/tests/test_html_entity.py @@ -82,20 +82,20 @@ class TestHTMLEntity(TreeEqualityTestCase): node1 = HTMLEntity("nbsp") node2 = HTMLEntity("107") node3 = HTMLEntity("e9") - self.assertEquals("nbsp", node1.value) - self.assertEquals("107", node2.value) - self.assertEquals("e9", node3.value) + self.assertEqual("nbsp", node1.value) + self.assertEqual("107", node2.value) + self.assertEqual("e9", node3.value) node1.value = "ffa4" node2.value = 72 node3.value = "Sigma" - self.assertEquals("ffa4", node1.value) + self.assertEqual("ffa4", node1.value) self.assertFalse(node1.named) self.assertTrue(node1.hexadecimal) - self.assertEquals("72", node2.value) + self.assertEqual("72", node2.value) self.assertFalse(node2.named) self.assertFalse(node2.hexadecimal) - self.assertEquals("Sigma", node3.value) + self.assertEqual("Sigma", node3.value) self.assertTrue(node3.named) self.assertFalse(node3.hexadecimal) @@ -147,12 +147,12 @@ class TestHTMLEntity(TreeEqualityTestCase): """test HTMLEntity.hex_char()""" node1 = HTMLEntity("e9") node2 = HTMLEntity("e9", hex_char="X") - self.assertEquals("x", node1.hex_char) - self.assertEquals("X", node2.hex_char) + self.assertEqual("x", node1.hex_char) + self.assertEqual("X", node2.hex_char) node1.hex_char = "X" node2.hex_char = "x" - self.assertEquals("X", node1.hex_char) - self.assertEquals("x", node2.hex_char) + self.assertEqual("X", node1.hex_char) + self.assertEqual("x", node2.hex_char) self.assertRaises(ValueError, setattr, node1, "hex_char", 123) self.assertRaises(ValueError, setattr, node1, "hex_char", "foobar") self.assertRaises(ValueError, setattr, node1, "hex_char", True) @@ -163,10 +163,10 @@ class TestHTMLEntity(TreeEqualityTestCase): node2 = HTMLEntity("107") node3 = HTMLEntity("e9") node4 = HTMLEntity("1f648") - self.assertEquals("\xa0", node1.normalize()) - self.assertEquals("k", node2.normalize()) - self.assertEquals("é", node3.normalize()) - self.assertEquals("\U0001F648", node4.normalize()) + self.assertEqual("\xa0", node1.normalize()) + self.assertEqual("k", node2.normalize()) + self.assertEqual("é", node3.normalize()) + self.assertEqual("\U0001F648", node4.normalize()) if __name__ == "__main__": unittest.main(verbosity=2) From 6a385b392190d9c4ce89c8cc8777efcb587972f5 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 20 Apr 2013 21:44:44 -0400 Subject: [PATCH 082/115] TestParameter and a fair chunk of TestTemplate. --- mwparserfromhell/nodes/template.py | 9 ++- tests/test_html_entity.py | 10 +-- tests/test_parameter.py | 79 +++++++++++++++++++++ tests/test_template.py | 140 +++++++++++++++++++++++++++++++++++++ 4 files changed, 228 insertions(+), 10 deletions(-) create mode 100644 tests/test_parameter.py create mode 100644 tests/test_template.py diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index e34ba7a..eb7f3a8 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -183,11 +183,10 @@ class Template(Node): def get(self, name): """Get the parameter whose name is *name*. - The returned object is a - :py:class:`~.Parameter` instance. Raises :py:exc:`ValueError` if no - parameter has this name. Since multiple parameters can have the same - name, we'll return the last match, since the last parameter is the only - one read by the MediaWiki parser. + The returned object is a :py:class:`~.Parameter` instance. Raises + :py:exc:`ValueError` if no parameter has this name. Since multiple + parameters can have the same name, we'll return the last match, since + the last parameter is the only one read by the MediaWiki parser. """ name = name.strip() if isinstance(name, basestring) else str(name) for param in reversed(self.params): diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py index 4bf32e8..a7a9669 100644 --- a/tests/test_html_entity.py +++ b/tests/test_html_entity.py @@ -78,7 +78,7 @@ class TestHTMLEntity(TreeEqualityTestCase): self.assertEqual(res, output) def test_value(self): - """test HTMLEntity.value()""" + """test getter/setter for the value attribute""" node1 = HTMLEntity("nbsp") node2 = HTMLEntity("107") node3 = HTMLEntity("e9") @@ -110,7 +110,7 @@ class TestHTMLEntity(TreeEqualityTestCase): self.assertRaises(ValueError, setattr, node1, "value", "1114112") def test_named(self): - """test HTMLEntity.named()""" + """test getter/setter for the named attribute""" node1 = HTMLEntity("nbsp") node2 = HTMLEntity("107") node3 = HTMLEntity("e9") @@ -128,7 +128,7 @@ class TestHTMLEntity(TreeEqualityTestCase): self.assertRaises(ValueError, setattr, node3, "named", True) def test_hexadecimal(self): - """test HTMLEntity.hexadecimal()""" + """test getter/setter for the hexadecimal attribute""" node1 = HTMLEntity("nbsp") node2 = HTMLEntity("107") node3 = HTMLEntity("e9") @@ -144,7 +144,7 @@ class TestHTMLEntity(TreeEqualityTestCase): self.assertRaises(ValueError, setattr, node1, "hexadecimal", True) def test_hex_char(self): - """test HTMLEntity.hex_char()""" + """test getter/setter for the hex_char attribute""" node1 = HTMLEntity("e9") node2 = HTMLEntity("e9", hex_char="X") self.assertEqual("x", node1.hex_char) @@ -158,7 +158,7 @@ class TestHTMLEntity(TreeEqualityTestCase): self.assertRaises(ValueError, setattr, node1, "hex_char", True) def test_normalize(self): - """test HTMLEntity.normalize()""" + """test getter/setter for the normalize attribute""" node1 = HTMLEntity("nbsp") node2 = HTMLEntity("107") node3 = HTMLEntity("e9") diff --git a/tests/test_parameter.py b/tests/test_parameter.py new file mode 100644 index 0000000..b46ad71 --- /dev/null +++ b/tests/test_parameter.py @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import str +from mwparserfromhell.nodes import Text +from mwparserfromhell.nodes.extras import Parameter +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.wikicode import Wikicode + +from ._test_tree_equality import TreeEqualityTestCase + +wrap = lambda L: Wikicode(SmartList(L)) + +class TestParameter(TreeEqualityTestCase): + """Test cases for the Parameter node extra.""" + + def test_unicode(self): + """test Parameter.__unicode__()""" + node = Parameter(wrap([Text("1")]), wrap([Text("foo")]), showkey=False) + self.assertEqual("foo", str(node)) + node2 = Parameter(wrap([Text("foo")]), wrap([Text("bar")])) + self.assertEqual("foo=bar", str(node2)) + + def test_name(self): + """test getter/setter for the name attribute""" + name1 = wrap([Text("1")]) + name2 = wrap([Text("foobar")]) + node1 = Parameter(name1, wrap([Text("foobar")]), showkey=False) + node2 = Parameter(name2, wrap([Text("baz")])) + self.assertIs(name1, node1.name) + self.assertIs(name2, node2.name) + node1.name = "héhehé" + node2.name = "héhehé" + self.assertWikicodeEqual(wrap([Text("héhehé")]), node1.name) + self.assertWikicodeEqual(wrap([Text("héhehé")]), node2.name) + + def test_value(self): + """test getter/setter for the value attribute""" + value = wrap([Text("bar")]) + node = Parameter(wrap([Text("foo")]), value) + self.assertIs(value, node.value) + node.value = "héhehé" + self.assertWikicodeEqual(wrap([Text("héhehé")]), node.value) + + def test_showkey(self): + """test getter/setter for the showkey attribute""" + node1 = Parameter(wrap([Text("1")]), wrap([Text("foo")]), showkey=False) + node2 = Parameter(wrap([Text("foo")]), wrap([Text("bar")])) + self.assertFalse(node1.showkey) + self.assertTrue(node2.showkey) + node1.showkey = True + node2.showkey = "" + self.assertTrue(node1.showkey) + self.assertFalse(node2.showkey) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/test_template.py b/tests/test_template.py new file mode 100644 index 0000000..fde7522 --- /dev/null +++ b/tests/test_template.py @@ -0,0 +1,140 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.compat import str +from mwparserfromhell.nodes import Template, Text +from mwparserfromhell.nodes.extras import Parameter +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.wikicode import Wikicode +from ._test_tree_equality import TreeEqualityTestCase + +wrap = lambda L: Wikicode(SmartList(L)) +pgens = lambda k, v: Parameter(wrap([Text(k)]), wrap([Text(v)]), True) +pgenh = lambda k, v: Parameter(wrap([Text(k)]), wrap([Text(v)]), False) + +class TestTemplate(TreeEqualityTestCase): + """Test cases for the Template node.""" + + def test_unicode(self): + """test Template.__unicode__()""" + node = Template(wrap([Text("foobar")])) + self.assertEqual("{{foobar}}", str(node)) + node2 = Template(wrap([Text("foo")]), + [pgenh("1", "bar"), pgens("abc", "def")]) + self.assertEqual("{{foo|bar|abc=def}}", str(node2)) + + def test_strip(self): + """test Template.__strip__()""" + node1 = Template(wrap([Text("foobar")])) + node2 = Template(wrap([Text("foo")]), + [pgenh("1", "bar"), pgens("abc", "def")]) + for a in (True, False): + for b in (True, False): + self.assertEqual(None, node1.__strip__(a, b)) + self.assertEqual(None, node2.__strip__(a, b)) + + def test_showtree(self): + """test Template.__showtree__()""" + output = [] + getter, marker = object(), object() + get = lambda code: output.append((getter, code)) + mark = lambda: output.append(marker) + node1 = Template(wrap([Text("foobar")])) + node2 = Template(wrap([Text("foo")]), + [pgenh("1", "bar"), pgens("abc", "def")]) + node1.__showtree__(output.append, get, mark) + node2.__showtree__(output.append, get, mark) + valid = [ + "{{", (getter, node1.name), "}}", "{{", (getter, node2.name), + " | ", marker, (getter, node2.params[0].name), " = ", marker, + (getter, node2.params[0].value), " | ", marker, + (getter, node2.params[1].name), " = ", marker, + (getter, node2.params[1].value), "}}"] + self.assertEqual(valid, output) + + def test_name(self): + """test getter/setter for the name attribute""" + name = wrap([Text("foobar")]) + node1 = Template(name) + node2 = Template(name, [pgenh("1", "bar")]) + self.assertIs(name, node1.name) + self.assertIs(name, node2.name) + node1.name = "asdf" + node2.name = "téstïng" + self.assertWikicodeEqual(wrap([Text("asdf")]), node1.name) + self.assertWikicodeEqual(wrap([Text("téstïng")]), node2.name) + + def test_params(self): + """test getter for the params attribute""" + node1 = Template(wrap([Text("foobar")])) + plist = [pgenh("1", "bar"), pgens("abc", "def")] + node2 = Template(wrap([Text("foo")]), plist) + self.assertEqual([], node1.params) + self.assertIs(plist, node2.params) + + def test_has_param(self): + """test Template.has_param()""" + node1 = Template(wrap([Text("foobar")])) + node2 = Template(wrap([Text("foo")]), + [pgenh("1", "bar"), pgens("abc", "def")]) + node3 = Template(wrap([Text("foo")]), + [pgenh("1", "a"), pgens("b", "c"), pgens("1", "d")]) + node4 = Template(wrap([Text("foo")]), + [pgenh("1", "a"), pgens("b", " ")]) + self.assertFalse(node1.has_param("foobar")) + self.assertTrue(node2.has_param(1)) + self.assertTrue(node2.has_param("abc")) + self.assertFalse(node2.has_param("def")) + self.assertTrue(node3.has_param("1")) + self.assertTrue(node3.has_param("b")) + self.assertFalse(node4.has_param("b")) + self.assertTrue(node3.has_param("b", False)) + self.assertTrue(node4.has_param("b", False)) + + def test_get(self): + """test Template.get()""" + node1 = Template(wrap([Text("foobar")])) + node2p1 = pgenh("1", "bar") + node2p2 = pgens("abc", "def") + node2 = Template(wrap([Text("foo")]), [node2p1, node2p2]) + node3p1 = pgens("b", "c") + node3p2 = pgens("1", "d") + node3 = Template(wrap([Text("foo")]), + [pgenh("1", "a"), node3p1, node3p2]) + node4p1 = pgens("b", " ") + node4 = Template(wrap([Text("foo")]), [pgenh("1", "a"), node4p1]) + self.assertRaises(ValueError, node1.get, "foobar") + self.assertIs(node2p1, node2.get(1)) + self.assertIs(node2p2, node2.get("abc")) + self.assertRaises(ValueError, node2.get, "def") + self.assertIs(node3p1, node3.get("b")) + self.assertIs(node3p2, node3.get("1")) + self.assertIs(node4p1, node4.get("b")) + + # add + # remove + +if __name__ == "__main__": + unittest.main(verbosity=2) From debcb6577e80cb5c371513e73bb82f1d2c107ec1 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 21 Apr 2013 20:50:05 -0400 Subject: [PATCH 083/115] Fix recursion issues by giving up at a certain point (closes #16). - Stop parsing new templates if the template depth gets above MAX_DEPTH (40) or if we've already tried to parse over MAX_CYCLES (100,000) templates. - Add two tests to ensure recursion works somewhat correctly. - Fix parsing the string "{{" with the Python tokenizer; add a test. --- mwparserfromhell/parser/tokenizer.c | 18 +++++++++++++----- mwparserfromhell/parser/tokenizer.h | 5 +++++ mwparserfromhell/parser/tokenizer.py | 24 +++++++++++++++++++----- tests/tokenizer/templates.mwtest | 21 +++++++++++++++++++++ 4 files changed, 58 insertions(+), 10 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index d3abb22..875263c 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -109,6 +109,8 @@ Tokenizer_push(Tokenizer* self, int context) return -1; top->next = self->topstack; self->topstack = top; + self->depth++; + self->cycles++; return 0; } @@ -174,6 +176,7 @@ Tokenizer_delete_top_of_stack(Tokenizer* self) Textbuffer_dealloc(top->textbuffer); self->topstack = top->next; free(top); + self->depth--; } /* @@ -1269,10 +1272,14 @@ Tokenizer_parse(Tokenizer* self, int context) Tokenizer_write_text(self, this); } else if (this == next && next == *"{") { - if (Tokenizer_parse_template_or_argument(self)) - return NULL; - if (self->topstack->context & LC_FAIL_NEXT) - self->topstack->context ^= LC_FAIL_NEXT; + if (Tokenizer_CAN_RECURSE(self)) { + if (Tokenizer_parse_template_or_argument(self)) + return NULL; + if (self->topstack->context & LC_FAIL_NEXT) + self->topstack->context ^= LC_FAIL_NEXT; + } + else + Tokenizer_write_text(self, this); } else if (this == *"|" && this_context & LC_TEMPLATE) { if (Tokenizer_handle_template_param(self)) @@ -1295,7 +1302,8 @@ Tokenizer_parse(Tokenizer* self, int context) Tokenizer_write_text(self, this); } else if (this == next && next == *"[") { - if (!(this_context & LC_WIKILINK_TITLE)) { + if (!(this_context & LC_WIKILINK_TITLE) && + Tokenizer_CAN_RECURSE(self)) { if (Tokenizer_parse_wikilink(self)) return NULL; if (self->topstack->context & LC_FAIL_NEXT) diff --git a/mwparserfromhell/parser/tokenizer.h b/mwparserfromhell/parser/tokenizer.h index 693538c..0730ea8 100644 --- a/mwparserfromhell/parser/tokenizer.h +++ b/mwparserfromhell/parser/tokenizer.h @@ -46,6 +46,8 @@ static const char* MARKERS[] = { #define NUM_MARKERS 18 #define TEXTBUFFER_BLOCKSIZE 1024 +#define MAX_DEPTH 40 +#define MAX_CYCLES 100000 #define MAX_ENTITY_SIZE 8 static int route_state = 0; @@ -165,12 +167,15 @@ typedef struct { Py_ssize_t head; /* current position in text */ Py_ssize_t length; /* length of text */ int global; /* global context */ + int depth; /* stack recursion depth */ + int cycles; /* total number of stack recursions */ } Tokenizer; /* Macros for accessing Tokenizer data: */ #define Tokenizer_READ(self, delta) (*PyUnicode_AS_UNICODE(Tokenizer_read(self, delta))) +#define Tokenizer_CAN_RECURSE(self) (self->depth < MAX_DEPTH && self->cycles < MAX_CYCLES) /* Function prototypes: */ diff --git a/mwparserfromhell/parser/tokenizer.py b/mwparserfromhell/parser/tokenizer.py index f995937..24eb9db 100644 --- a/mwparserfromhell/parser/tokenizer.py +++ b/mwparserfromhell/parser/tokenizer.py @@ -42,6 +42,8 @@ class Tokenizer(object): END = object() MARKERS = ["{", "}", "[", "]", "<", ">", "|", "=", "&", "#", "*", ";", ":", "/", "-", "!", "\n", END] + MAX_DEPTH = 40 + MAX_CYCLES = 100000 regex = re.compile(r"([{}\[\]<>|=&#*;:/\-!\n])", flags=re.IGNORECASE) def __init__(self): @@ -49,6 +51,8 @@ class Tokenizer(object): self._head = 0 self._stacks = [] self._global = 0 + self._depth = 0 + self._cycles = 0 @property def _stack(self): @@ -76,6 +80,8 @@ class Tokenizer(object): def _push(self, context=0): """Add a new token stack, context, and textbuffer to the list.""" self._stacks.append([[], context, []]) + self._depth += 1 + self._cycles += 1 def _push_textbuffer(self): """Push the textbuffer onto the stack as a Text node and clear it.""" @@ -90,6 +96,7 @@ class Tokenizer(object): stack's context with the current stack's. """ self._push_textbuffer() + self._depth -= 1 if keep_context: context = self._context stack = self._stacks.pop()[0] @@ -97,6 +104,10 @@ class Tokenizer(object): return stack return self._stacks.pop()[0] + def _can_recurse(self): + """Return whether or not our max recursion depth has been exceeded.""" + return self._depth < self.MAX_DEPTH and self._cycles < self.MAX_CYCLES + def _fail_route(self): """Fail the current tokenization route. @@ -418,7 +429,7 @@ class Tokenizer(object): else: if this == "\n": self._context |= contexts.FAIL_ON_TEXT - elif this is not self.END or not this.isspace(): + elif this is self.END or not this.isspace(): self._context |= contexts.HAS_TEXT return True else: @@ -479,9 +490,12 @@ class Tokenizer(object): else: self._write_text(this) elif this == next == "{": - self._parse_template_or_argument() - if self._context & contexts.FAIL_NEXT: - self._context ^= contexts.FAIL_NEXT + if self._can_recurse(): + self._parse_template_or_argument() + if self._context & contexts.FAIL_NEXT: + self._context ^= contexts.FAIL_NEXT + else: + self._write_text("{") elif this == "|" and self._context & contexts.TEMPLATE: self._handle_template_param() elif this == "=" and self._context & contexts.TEMPLATE_PARAM_KEY: @@ -496,7 +510,7 @@ class Tokenizer(object): else: self._write_text("}") elif this == next == "[": - if not self._context & contexts.WIKILINK_TITLE: + if not self._context & contexts.WIKILINK_TITLE and self._can_recurse(): self._parse_wikilink() if self._context & contexts.FAIL_NEXT: self._context ^= contexts.FAIL_NEXT diff --git a/tests/tokenizer/templates.mwtest b/tests/tokenizer/templates.mwtest index fa3c0a4..cf41bb3 100644 --- a/tests/tokenizer/templates.mwtest +++ b/tests/tokenizer/templates.mwtest @@ -481,6 +481,13 @@ output: [TemplateOpen(), Text(text="foo"), TemplateParamSeparator(), Text(text=" --- +name: incomplete_stub +label: incomplete templates that should fail gracefully: just an opening +input: "{{" +output: [Text(text="{{")] + +--- + name: incomplete_plain label: incomplete templates that should fail gracefully: no close whatsoever input: "{{stuff}} {{foobar" @@ -597,3 +604,17 @@ name: incomplete_nested_template_as_param_value label: incomplete templates that should fail gracefully: a valid nested template as a parameter value input: "{{stuff}} {{foo|bar={{baz}}" output: [TemplateOpen(), Text(text="stuff"), TemplateClose(), Text(text=" {{foo|bar="), TemplateOpen(), Text(text="baz"), TemplateClose()] + +--- + +name: recursion_one_hundred_opens +label: test potentially dangerous recursion: one hundred template openings +input: "{{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{" +output: [Text(text="{{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{ {{")] + +--- + +name: recursion_opens_and_closes +label: test potentially dangerous recursion: template openings and closings +input: "{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}" +output: [Text(text="{{|"), TemplateOpen(), TemplateClose(), Text(text="{{|"), TemplateOpen(), TemplateClose(), TemplateOpen(), TemplateParamSeparator(), TemplateOpen(), TemplateClose(), Text(text="{{"), TemplateParamSeparator(), Text(text="{{"), TemplateClose(), Text(text="{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}{{|{{}}")] From 786d6192746284ef19c166c4d9eb95050c661b1c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 24 Apr 2013 10:28:17 -0400 Subject: [PATCH 084/115] Drop force_no_field in template.remove(); implement test_remove(). - Also add tests for spacing in param names. --- mwparserfromhell/nodes/template.py | 27 ++++++++++-------- tests/test_template.py | 56 ++++++++++++++++++++++++++++++++++---- 2 files changed, 66 insertions(+), 17 deletions(-) diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index eb7f3a8..751c2b1 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -142,9 +142,9 @@ class Template(Node): return False return True - def _remove_without_field(self, param, i, force_no_field): + def _remove_without_field(self, param, i): """Return False if a parameter name should be kept, otherwise True.""" - if not param.showkey and not force_no_field: + if not param.showkey: dependents = [not after.showkey for after in self.params[i+1:]] if any(dependents): return False @@ -266,22 +266,23 @@ class Template(Node): self.params.append(param) return param - def remove(self, name, keep_field=False, force_no_field=False): + def remove(self, name, keep_field=False): """Remove a parameter from the template whose name is *name*. If *keep_field* is ``True``, we will keep the parameter's name, but blank its value. Otherwise, we will remove the parameter completely *unless* other parameters are dependent on it (e.g. removing ``bar`` from ``{{foo|bar|baz}}`` is unsafe because ``{{foo|baz}}`` is not what - we expected, so ``{{foo||baz}}`` will be produced instead), unless - *force_no_field* is also ``True``. If the parameter shows up multiple - times in the template, we will remove all instances of it (and keep - one if *keep_field* is ``True`` - that being the first instance if - none of the instances have dependents, otherwise that instance will be - kept). + we expected, so ``{{foo||baz}}`` will be produced instead). + + If the parameter shows up multiple times in the template, we will + remove all instances of it (and keep one if *keep_field* is ``True`` - + the first instance if none have dependents, otherwise the one with + dependents will be kept). """ name = name.strip() if isinstance(name, basestring) else str(name) removed = False + to_remove =[] for i, param in enumerate(self.params): if param.name.strip() == name: if keep_field: @@ -289,13 +290,15 @@ class Template(Node): self._blank_param_value(param.value) keep_field = False else: - self.params.remove(param) + to_remove.append(param) else: - if self._remove_without_field(param, i, force_no_field): - self.params.remove(param) + if self._remove_without_field(param, i): + to_remove.append(param) else: self._blank_param_value(param.value) if not removed: removed = True if not removed: raise ValueError(name) + for param in to_remove: + self.params.remove(param) diff --git a/tests/test_template.py b/tests/test_template.py index fde7522..ecac917 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -98,7 +98,7 @@ class TestTemplate(TreeEqualityTestCase): """test Template.has_param()""" node1 = Template(wrap([Text("foobar")])) node2 = Template(wrap([Text("foo")]), - [pgenh("1", "bar"), pgens("abc", "def")]) + [pgenh("1", "bar"), pgens("\nabc ", "def")]) node3 = Template(wrap([Text("foo")]), [pgenh("1", "a"), pgens("b", "c"), pgens("1", "d")]) node4 = Template(wrap([Text("foo")]), @@ -108,7 +108,7 @@ class TestTemplate(TreeEqualityTestCase): self.assertTrue(node2.has_param("abc")) self.assertFalse(node2.has_param("def")) self.assertTrue(node3.has_param("1")) - self.assertTrue(node3.has_param("b")) + self.assertTrue(node3.has_param(" b ")) self.assertFalse(node4.has_param("b")) self.assertTrue(node3.has_param("b", False)) self.assertTrue(node4.has_param("b", False)) @@ -123,7 +123,7 @@ class TestTemplate(TreeEqualityTestCase): node3p2 = pgens("1", "d") node3 = Template(wrap([Text("foo")]), [pgenh("1", "a"), node3p1, node3p2]) - node4p1 = pgens("b", " ") + node4p1 = pgens(" b", " ") node4 = Template(wrap([Text("foo")]), [pgenh("1", "a"), node4p1]) self.assertRaises(ValueError, node1.get, "foobar") self.assertIs(node2p1, node2.get(1)) @@ -131,10 +131,56 @@ class TestTemplate(TreeEqualityTestCase): self.assertRaises(ValueError, node2.get, "def") self.assertIs(node3p1, node3.get("b")) self.assertIs(node3p2, node3.get("1")) - self.assertIs(node4p1, node4.get("b")) + self.assertIs(node4p1, node4.get("b ")) # add - # remove + + def test_remove(self): + """test Template.remove()""" + node1 = Template(wrap([Text("foobar")])) + node2 = Template(wrap([Text("foo")]), [pgenh("1", "bar"), + pgens("abc", "def")]) + node3 = Template(wrap([Text("foo")]), [pgenh("1", "bar"), + pgens("abc", "def")]) + node4 = Template(wrap([Text("foo")]), [pgenh("1", "bar"), + pgenh("2", "baz")]) + node5 = Template(wrap([Text("foo")]), [ + pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) + node6 = Template(wrap([Text("foo")]), [ + pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) + node7 = Template(wrap([Text("foo")]), [ + pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")]) + node8 = Template(wrap([Text("foo")]), [ + pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")]) + node9 = Template(wrap([Text("foo")]), [ + pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) + node10 = Template(wrap([Text("foo")]), [ + pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) + + node2.remove("1") + node2.remove("abc") + node3.remove(1, keep_field=True) + node3.remove("abc", keep_field=True) + node4.remove("1", keep_field=False) + node5.remove("a", keep_field=False) + node6.remove("a", keep_field=True) + node7.remove(1, keep_field=True) + node8.remove(1, keep_field=False) + node9.remove(1, keep_field=True) + node10.remove(1, keep_field=False) + + self.assertRaises(ValueError, node1.remove, 1) + self.assertRaises(ValueError, node1.remove, "a") + self.assertRaises(ValueError, node2.remove, "1") + self.assertEquals("{{foo}}", node2) + self.assertEquals("{{foo||abc=}}", node3) + self.assertEquals("{{foo||baz}}", node4) + self.assertEquals("{{foo|b=c}}", node5) + self.assertEquals("{{foo| a=|b=c}}", node6) + self.assertEquals("{{foo|1 =|2=c}}", node7) + self.assertEquals("{{foo|2=c}}", node8) + self.assertEquals("{{foo||c}}", node9) + self.assertEquals("{{foo||c}}", node10) if __name__ == "__main__": unittest.main(verbosity=2) From 6af2f3b0639ea515a343cbb36a38daff661f8e62 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 24 Apr 2013 17:46:53 -0400 Subject: [PATCH 085/115] assertEquals -> assertEqual --- tests/test_template.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/test_template.py b/tests/test_template.py index ecac917..0895219 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -172,15 +172,15 @@ class TestTemplate(TreeEqualityTestCase): self.assertRaises(ValueError, node1.remove, 1) self.assertRaises(ValueError, node1.remove, "a") self.assertRaises(ValueError, node2.remove, "1") - self.assertEquals("{{foo}}", node2) - self.assertEquals("{{foo||abc=}}", node3) - self.assertEquals("{{foo||baz}}", node4) - self.assertEquals("{{foo|b=c}}", node5) - self.assertEquals("{{foo| a=|b=c}}", node6) - self.assertEquals("{{foo|1 =|2=c}}", node7) - self.assertEquals("{{foo|2=c}}", node8) - self.assertEquals("{{foo||c}}", node9) - self.assertEquals("{{foo||c}}", node10) + self.assertEqual("{{foo}}", node2) + self.assertEqual("{{foo||abc=}}", node3) + self.assertEqual("{{foo||baz}}", node4) + self.assertEqual("{{foo|b=c}}", node5) + self.assertEqual("{{foo| a=|b=c}}", node6) + self.assertEqual("{{foo|1 =|2=c}}", node7) + self.assertEqual("{{foo|2=c}}", node8) + self.assertEqual("{{foo||c}}", node9) + self.assertEqual("{{foo||c}}", node10) if __name__ == "__main__": unittest.main(verbosity=2) From b46c98b0121d6b9bbb13720a658a3a8b0237932e Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 25 Apr 2013 10:22:20 -0400 Subject: [PATCH 086/115] Clean up template.add(); add a before param but do not implement yet. --- mwparserfromhell/nodes/template.py | 47 +++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index 751c2b1..4b74971 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -194,20 +194,30 @@ class Template(Node): return param raise ValueError(name) - def add(self, name, value, showkey=None, force_nonconformity=False): + def add(self, name, value, showkey=None, before=None, + preserve_spacing=True): """Add a parameter to the template with a given *name* and *value*. *name* and *value* can be anything parasable by - :py:func:`.utils.parse_anything`; pipes (and equal signs, if - appropriate) are automatically escaped from *value* where applicable. + :py:func:`.utils.parse_anything`; pipes and equal signs are + automatically escaped from *value* when appropriate. + If *showkey* is given, this will determine whether or not to show the parameter's name (e.g., ``{{foo|bar}}``'s parameter has a name of ``"1"`` but it is hidden); otherwise, we'll make a safe and intelligent guess. If *name* is already a parameter, we'll replace its value while - keeping the same spacing rules unless *force_nonconformity* is - ``True``. We will also try to guess the dominant spacing convention - when adding a new parameter using :py:meth:`_get_spacing_conventions` - unless *force_nonconformity* is ``True``. + keeping the same spacing rules. We will also try to guess the dominant + spacing convention when adding a new parameter using + :py:meth:`_get_spacing_conventions`. + + If *before* is given (either a :py:class:`~.Parameter` object or a + name), then we will place the parameter immediately before this one. + Otherwise, it will be added at the end. This is ignored if the + parameter already exists. + + If *preserve_spacing* is ``False``, we will avoid preserving spacing + conventions when changing the value of an existing parameter or when + adding a new one. """ name, value = parse_anything(name), parse_anything(value) self._surface_escape(value, "|") @@ -220,10 +230,10 @@ class Template(Node): self._surface_escape(value, "=") existing.showkey = showkey nodes = existing.value.nodes - if force_nonconformity: - existing.value = value - else: + if preserve_spacing: existing.value = parse_anything([nodes[0], value, nodes[1]]) + else: + existing.value = value return existing if showkey is None: @@ -245,22 +255,11 @@ class Template(Node): if not showkey: self._surface_escape(value, "=") - if not force_nonconformity: + if preserve_spacing: before_n, after_n = self._get_spacing_conventions(use_names=True) - if before_n and after_n: - name = parse_anything([before_n, name, after_n]) - elif before_n: - name = parse_anything([before_n, name]) - elif after_n: - name = parse_anything([name, after_n]) - before_v, after_v = self._get_spacing_conventions(use_names=False) - if before_v and after_v: - value = parse_anything([before_v, value, after_v]) - elif before_v: - value = parse_anything([before_v, value]) - elif after_v: - value = parse_anything([value, after_v]) + name = parse_anything([before_n, name, after_n]) + value = parse_anything([before_v, value, after_v]) param = Parameter(name, value, showkey) self.params.append(param) From 2ca3b2805e5a346600508e3e622bddad6be38f93 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 26 Apr 2013 10:39:53 -0400 Subject: [PATCH 087/115] Implement 'before' parameter for Template.add() (closes #21) --- mwparserfromhell/nodes/template.py | 11 ++++++++--- tests/test_template.py | 22 +++++++++++++++++++++- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index 4b74971..9d28be4 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -226,9 +226,9 @@ class Template(Node): self.remove(name, keep_field=True) existing = self.get(name) if showkey is not None: - if not showkey: - self._surface_escape(value, "=") existing.showkey = showkey + if not existing.showkey: + self._surface_escape(value, "=") nodes = existing.value.nodes if preserve_spacing: existing.value = parse_anything([nodes[0], value, nodes[1]]) @@ -262,7 +262,12 @@ class Template(Node): value = parse_anything([before_v, value, after_v]) param = Parameter(name, value, showkey) - self.params.append(param) + if before: + if not isinstance(before, Parameter): + before = self.get(before) + self.params.insert(self.params.index(before), param) + else: + self.params.append(param) return param def remove(self, name, keep_field=False): diff --git a/tests/test_template.py b/tests/test_template.py index 0895219..a1661f2 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -133,7 +133,27 @@ class TestTemplate(TreeEqualityTestCase): self.assertIs(node3p2, node3.get("1")) self.assertIs(node4p1, node4.get("b ")) - # add + def test_add(self): + """test Template.add()""" + # add new param with showkey to end + # add new param without showkey to end + # add new param to end with an escapable | + # add new param with showkey to end with an escapable = + # add new param without showkey to end with an escapable = + # add new param with showkey to end preserving spacing (x3) + # add new param without showkey to end not preserving spacing + # add new param guessing showkey where key is to be shown + # add new param guessing showkey where key is to be shown with an escapable = + # add new param guessing showkey where key is not to be shown + # add new param guessing showkey where key is not to be shown with an escapable = + # add existing parameter without modifying showkey + # add existing parameter without modifying showkey with an escapable = + # add existing parameter with modifying showkey + # add existing parameter with modifying showkey with an escapable = + # add existing parameter preserving spacing (x3) + # add existing parameter not preserving spacing + # add existing parameter when there are multiple params involved + # add existing parameter when there are multiple params involved; params with dependencies def test_remove(self): """test Template.remove()""" From 81849013bc31b12b1a82a98ff0b4a25ccb597822 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 2 May 2013 11:01:13 -0400 Subject: [PATCH 088/115] Finishing tests for Templates; some fixes. --- mwparserfromhell/nodes/template.py | 21 ++- tests/test_template.py | 262 +++++++++++++++++++++++++++++-------- 2 files changed, 220 insertions(+), 63 deletions(-) diff --git a/mwparserfromhell/nodes/template.py b/mwparserfromhell/nodes/template.py index 9d28be4..3834d41 100644 --- a/mwparserfromhell/nodes/template.py +++ b/mwparserfromhell/nodes/template.py @@ -81,7 +81,7 @@ class Template(Node): in parameter names or values so they are not mistaken for new parameters. """ - replacement = HTMLEntity(value=ord(char)) + replacement = str(HTMLEntity(value=ord(char))) for node in code.filter_text(recursive=False): if char in node: code.replace(node, node.replace(char, replacement)) @@ -107,7 +107,7 @@ class Template(Node): values = tuple(theories.values()) best = max(values) confidence = float(best) / sum(values) - if confidence > 0.75: + if confidence >= 0.75: return tuple(theories.keys())[values.index(best)] def _get_spacing_conventions(self, use_names): @@ -205,15 +205,19 @@ class Template(Node): If *showkey* is given, this will determine whether or not to show the parameter's name (e.g., ``{{foo|bar}}``'s parameter has a name of ``"1"`` but it is hidden); otherwise, we'll make a safe and intelligent - guess. If *name* is already a parameter, we'll replace its value while - keeping the same spacing rules. We will also try to guess the dominant - spacing convention when adding a new parameter using + guess. + + If *name* is already a parameter in the template, we'll replace its + value while keeping the same whitespace around it. We will also try to + guess the dominant spacing convention when adding a new parameter using :py:meth:`_get_spacing_conventions`. If *before* is given (either a :py:class:`~.Parameter` object or a name), then we will place the parameter immediately before this one. - Otherwise, it will be added at the end. This is ignored if the - parameter already exists. + Otherwise, it will be added at the end. If *before* is a name and + exists multiple times in the template, we will place it before the last + occurance. If *before* is not in the template, :py:exc:`ValueError` is + raised. The argument is ignored if the new parameter already exists. If *preserve_spacing* is ``False``, we will avoid preserving spacing conventions when changing the value of an existing parameter or when @@ -231,6 +235,9 @@ class Template(Node): self._surface_escape(value, "=") nodes = existing.value.nodes if preserve_spacing: + for i in range(2): # Ignore empty text nodes + if not nodes[i]: + nodes[i] = None existing.value = parse_anything([nodes[0], value, nodes[1]]) else: existing.value = value diff --git a/tests/test_template.py b/tests/test_template.py index a1661f2..3eb88ad 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -24,31 +24,32 @@ from __future__ import unicode_literals import unittest from mwparserfromhell.compat import str -from mwparserfromhell.nodes import Template, Text +from mwparserfromhell.nodes import HTMLEntity, Template, Text from mwparserfromhell.nodes.extras import Parameter from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode from ._test_tree_equality import TreeEqualityTestCase wrap = lambda L: Wikicode(SmartList(L)) -pgens = lambda k, v: Parameter(wrap([Text(k)]), wrap([Text(v)]), True) -pgenh = lambda k, v: Parameter(wrap([Text(k)]), wrap([Text(v)]), False) +wraptext = lambda t: wrap([Text(t)]) +pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True) +pgenh = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=False) class TestTemplate(TreeEqualityTestCase): """Test cases for the Template node.""" def test_unicode(self): """test Template.__unicode__()""" - node = Template(wrap([Text("foobar")])) + node = Template(wraptext("foobar")) self.assertEqual("{{foobar}}", str(node)) - node2 = Template(wrap([Text("foo")]), + node2 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("abc", "def")]) self.assertEqual("{{foo|bar|abc=def}}", str(node2)) def test_strip(self): """test Template.__strip__()""" - node1 = Template(wrap([Text("foobar")])) - node2 = Template(wrap([Text("foo")]), + node1 = Template(wraptext("foobar")) + node2 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("abc", "def")]) for a in (True, False): for b in (True, False): @@ -61,8 +62,8 @@ class TestTemplate(TreeEqualityTestCase): getter, marker = object(), object() get = lambda code: output.append((getter, code)) mark = lambda: output.append(marker) - node1 = Template(wrap([Text("foobar")])) - node2 = Template(wrap([Text("foo")]), + node1 = Template(wraptext("foobar")) + node2 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("abc", "def")]) node1.__showtree__(output.append, get, mark) node2.__showtree__(output.append, get, mark) @@ -76,33 +77,32 @@ class TestTemplate(TreeEqualityTestCase): def test_name(self): """test getter/setter for the name attribute""" - name = wrap([Text("foobar")]) + name = wraptext("foobar") node1 = Template(name) node2 = Template(name, [pgenh("1", "bar")]) self.assertIs(name, node1.name) self.assertIs(name, node2.name) node1.name = "asdf" node2.name = "téstïng" - self.assertWikicodeEqual(wrap([Text("asdf")]), node1.name) - self.assertWikicodeEqual(wrap([Text("téstïng")]), node2.name) + self.assertWikicodeEqual(wraptext("asdf"), node1.name) + self.assertWikicodeEqual(wraptext("téstïng"), node2.name) def test_params(self): """test getter for the params attribute""" - node1 = Template(wrap([Text("foobar")])) + node1 = Template(wraptext("foobar")) plist = [pgenh("1", "bar"), pgens("abc", "def")] - node2 = Template(wrap([Text("foo")]), plist) + node2 = Template(wraptext("foo"), plist) self.assertEqual([], node1.params) self.assertIs(plist, node2.params) def test_has_param(self): """test Template.has_param()""" - node1 = Template(wrap([Text("foobar")])) - node2 = Template(wrap([Text("foo")]), + node1 = Template(wraptext("foobar")) + node2 = Template(wraptext("foo"), [pgenh("1", "bar"), pgens("\nabc ", "def")]) - node3 = Template(wrap([Text("foo")]), + node3 = Template(wraptext("foo"), [pgenh("1", "a"), pgens("b", "c"), pgens("1", "d")]) - node4 = Template(wrap([Text("foo")]), - [pgenh("1", "a"), pgens("b", " ")]) + node4 = Template(wraptext("foo"), [pgenh("1", "a"), pgens("b", " ")]) self.assertFalse(node1.has_param("foobar")) self.assertTrue(node2.has_param(1)) self.assertTrue(node2.has_param("abc")) @@ -115,16 +115,15 @@ class TestTemplate(TreeEqualityTestCase): def test_get(self): """test Template.get()""" - node1 = Template(wrap([Text("foobar")])) + node1 = Template(wraptext("foobar")) node2p1 = pgenh("1", "bar") node2p2 = pgens("abc", "def") - node2 = Template(wrap([Text("foo")]), [node2p1, node2p2]) + node2 = Template(wraptext("foo"), [node2p1, node2p2]) node3p1 = pgens("b", "c") node3p2 = pgens("1", "d") - node3 = Template(wrap([Text("foo")]), - [pgenh("1", "a"), node3p1, node3p2]) + node3 = Template(wraptext("foo"), [pgenh("1", "a"), node3p1, node3p2]) node4p1 = pgens(" b", " ") - node4 = Template(wrap([Text("foo")]), [pgenh("1", "a"), node4p1]) + node4 = Template(wraptext("foo"), [pgenh("1", "a"), node4p1]) self.assertRaises(ValueError, node1.get, "foobar") self.assertIs(node2p1, node2.get(1)) self.assertIs(node2p2, node2.get("abc")) @@ -135,46 +134,197 @@ class TestTemplate(TreeEqualityTestCase): def test_add(self): """test Template.add()""" - # add new param with showkey to end - # add new param without showkey to end - # add new param to end with an escapable | - # add new param with showkey to end with an escapable = - # add new param without showkey to end with an escapable = - # add new param with showkey to end preserving spacing (x3) - # add new param without showkey to end not preserving spacing - # add new param guessing showkey where key is to be shown - # add new param guessing showkey where key is to be shown with an escapable = - # add new param guessing showkey where key is not to be shown - # add new param guessing showkey where key is not to be shown with an escapable = - # add existing parameter without modifying showkey - # add existing parameter without modifying showkey with an escapable = - # add existing parameter with modifying showkey - # add existing parameter with modifying showkey with an escapable = - # add existing parameter preserving spacing (x3) - # add existing parameter not preserving spacing - # add existing parameter when there are multiple params involved - # add existing parameter when there are multiple params involved; params with dependencies + node1 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) + node2 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) + node3 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) + node4 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) + node5 = Template(wraptext("a"), [pgens("b", "c"), + pgens(" d ", "e")]) + node6 = Template(wraptext("a"), [pgens("b", "c"), pgens("b", "d"), + pgens("b", "e")]) + node7 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) + node8p = pgenh("1", "d") + node8 = Template(wraptext("a"), [pgens("b", "c"), node8p]) + node9 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "d")]) + node10 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "e")]) + node11 = Template(wraptext("a"), [pgens("b", "c")]) + node12 = Template(wraptext("a"), [pgens("b", "c")]) + node13 = Template(wraptext("a"), [pgens("\nb ", " c"), + pgens("\nd ", " e"), + pgens("\nf ", " g")]) + node14 = Template(wraptext("a\n"), [pgens("b ", "c\n"), + pgens("d ", " e"), + pgens("f ", "g\n"), + pgens("h ", " i\n")]) + node15 = Template(wraptext("a"), [pgens("b ", " c\n"), + pgens("\nd ", " e"), + pgens("\nf ", "g ")]) + node16 = Template(wraptext("a"), [pgens("\nb ", " c"), + pgens("\nd ", " e"), + pgens("\nf ", " g")]) + node17 = Template(wraptext("a"), [pgens("\nb ", " c"), + pgens("\nd ", " e"), + pgens("\nf ", " g")]) + node18 = Template(wraptext("a\n"), [pgens("b ", "c\n"), + pgens("d ", " e"), + pgens("f ", "g\n"), + pgens("h ", " i\n")]) + node19 = Template(wraptext("a"), [pgens("b ", " c\n"), + pgens("\nd ", " e"), + pgens("\nf ", "g ")]) + node20 = Template(wraptext("a"), [pgens("\nb ", " c"), + pgens("\nd ", " e"), + pgens("\nf ", " g")]) + node21 = Template(wraptext("a"), [pgenh("1", "b")]) + node22 = Template(wraptext("a"), [pgenh("1", "b")]) + node23 = Template(wraptext("a"), [pgenh("1", "b")]) + node24 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), + pgenh("3", "d"), pgenh("4", "e")]) + node25 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), + pgens("4", "d"), pgens("5", "e")]) + node26 = Template(wraptext("a"), [pgenh("1", "b"), pgenh("2", "c"), + pgens("4", "d"), pgens("5", "e")]) + node27 = Template(wraptext("a"), [pgenh("1", "b")]) + node28 = Template(wraptext("a"), [pgenh("1", "b")]) + node29 = Template(wraptext("a"), [pgens("b", "c")]) + node30 = Template(wraptext("a"), [pgenh("1", "b")]) + node31 = Template(wraptext("a"), [pgenh("1", "b")]) + node32 = Template(wraptext("a"), [pgens("1", "b")]) + node33 = Template(wraptext("a"), [pgens("\nb ", " c"), + pgens("\nd ", " e"), + pgens("\nf ", " g")]) + node34 = Template(wraptext("a\n"), [pgens("b ", "c\n"), + pgens("d ", " e"), + pgens("f ", "g\n"), + pgens("h ", " i\n")]) + node35 = Template(wraptext("a"), [pgens("b ", " c\n"), + pgens("\nd ", " e"), + pgens("\nf ", "g ")]) + node36 = Template(wraptext("a"), [pgens("\nb ", " c "), + pgens("\nd ", " e "), + pgens("\nf ", " g ")]) + node37 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), + pgens("b", "f"), pgens("b", "h"), + pgens("i", "j")]) + node37 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), + pgens("b", "f"), pgens("b", "h"), + pgens("i", "j")]) + node38 = Template(wraptext("a"), [pgens("1", "b"), pgens("x", "y"), + pgens("1", "c"), pgens("2", "d")]) + node39 = Template(wraptext("a"), [pgens("1", "b"), pgens("x", "y"), + pgenh("1", "c"), pgenh("2", "d")]) + node40 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), + pgens("f", "g")]) + + node1.add("e", "f", showkey=True) + node2.add(2, "g", showkey=False) + node3.add("e", "foo|bar", showkey=True) + node4.add("e", "f", showkey=True, before="b") + node5.add("f", "g", showkey=True, before=" d ") + node6.add("f", "g", showkey=True, before="b") + self.assertRaises(ValueError, node7.add, "e", "f", showkey=True, + before="q") + node8.add("e", "f", showkey=True, before=node8p) + node9.add("e", "f", showkey=True, before=pgenh("1", "d")) + self.assertRaises(ValueError, node10.add, "e", "f", showkey=True, + before=pgenh("1", "d")) + node11.add("d", "foo=bar", showkey=True) + node12.add("1", "foo=bar", showkey=False) + node13.add("h", "i", showkey=True) + node14.add("j", "k", showkey=True) + node15.add("h", "i", showkey=True) + node16.add("h", "i", showkey=True, preserve_spacing=False) + node17.add("h", "i", showkey=False) + node18.add("j", "k", showkey=False) + node19.add("h", "i", showkey=False) + node20.add("h", "i", showkey=False, preserve_spacing=False) + node21.add("2", "c") + node22.add("3", "c") + node23.add("c", "d") + node24.add("5", "f") + node25.add("3", "f") + node26.add("6", "f") + node27.add("c", "foo=bar") + node28.add("2", "foo=bar") + node29.add("b", "d") + node30.add("1", "foo=bar") + node31.add("1", "foo=bar", showkey=True) + node32.add("1", "foo=bar", showkey=False) + node33.add("d", "foo") + node34.add("f", "foo") + node35.add("f", "foo") + node36.add("d", "foo", preserve_spacing=False) + node37.add("b", "k") + node38.add("1", "e") + node39.add("1", "e") + node40.add("d", "h", before="b") + + self.assertEquals("{{a|b=c|d|e=f}}", node1) + self.assertEquals("{{a|b=c|d|g}}", node2) + self.assertEquals("{{a|b=c|d|e=foo|bar}}", node3) + self.assertIsInstance(node3.params[2].value.get(1), HTMLEntity) + self.assertEquals("{{a|e=f|b=c|d}}", node4) + self.assertEquals("{{a|b=c|f=g| d =e}}", node5) + self.assertEquals("{{a|b=c|b=d|f=g|b=e}}", node6) + self.assertEquals("{{a|b=c|d}}", node7) + self.assertEquals("{{a|b=c|e=f|d}}", node8) + self.assertEquals("{{a|b=c|e=f|d}}", node9) + self.assertEquals("{{a|b=c|e}}", node10) + self.assertEquals("{{a|b=c|d=foo=bar}}", node11) + self.assertEquals("{{a|b=c|foo=bar}}", node12) + self.assertIsInstance(node12.params[1].value.get(1), HTMLEntity) + self.assertEquals("{{a|\nb = c|\nd = e|\nf = g|\nh = i}}", node13) + self.assertEquals("{{a\n|b =c\n|d = e|f =g\n|h = i\n|j =k\n}}", node14) + self.assertEquals("{{a|b = c\n|\nd = e|\nf =g |h =i}}", node15) + self.assertEquals("{{a|\nb = c|\nd = e|\nf = g|h=i}}", node16) + self.assertEquals("{{a|\nb = c|\nd = e|\nf = g| i}}", node17) + self.assertEquals("{{a\n|b =c\n|d = e|f =g\n|h = i\n|k\n}}", node18) + self.assertEquals("{{a|b = c\n|\nd = e|\nf =g |i}}", node19) + self.assertEquals("{{a|\nb = c|\nd = e|\nf = g|i}}", node20) + self.assertEquals("{{a|b|c}}", node21) + self.assertEquals("{{a|b|3=c}}", node22) + self.assertEquals("{{a|b|c=d}}", node23) + self.assertEquals("{{a|b|c|d|e|f}}", node24) + self.assertEquals("{{a|b|c|4=d|5=e|f}}", node25) + self.assertEquals("{{a|b|c|4=d|5=e|6=f}}", node26) + self.assertEquals("{{a|b|c=foo=bar}}", node27) + self.assertEquals("{{a|b|foo=bar}}", node28) + self.assertIsInstance(node28.params[1].value.get(1), HTMLEntity) + self.assertEquals("{{a|b=d}}", node29) + self.assertEquals("{{a|foo=bar}}", node30) + self.assertIsInstance(node30.params[0].value.get(1), HTMLEntity) + self.assertEquals("{{a|1=foo=bar}}", node31) + self.assertEquals("{{a|foo=bar}}", node32) + self.assertIsInstance(node32.params[0].value.get(1), HTMLEntity) + self.assertEquals("{{a|\nb = c|\nd = foo|\nf = g}}", node33) + self.assertEquals("{{a\n|b =c\n|d = e|f =foo\n|h = i\n}}", node34) + self.assertEquals("{{a|b = c\n|\nd = e|\nf =foo }}", node35) + self.assertEquals("{{a|\nb = c |\nd =foo|\nf = g }}", node36) + self.assertEquals("{{a|b=k|d=e|i=j}}", node37) + self.assertEquals("{{a|1=e|x=y|2=d}}", node38) + self.assertEquals("{{a|x=y|e|d}}", node39) + self.assertEquals("{{a|b=c|d=h|f=g}}", node40) def test_remove(self): """test Template.remove()""" - node1 = Template(wrap([Text("foobar")])) - node2 = Template(wrap([Text("foo")]), [pgenh("1", "bar"), - pgens("abc", "def")]) - node3 = Template(wrap([Text("foo")]), [pgenh("1", "bar"), - pgens("abc", "def")]) - node4 = Template(wrap([Text("foo")]), [pgenh("1", "bar"), - pgenh("2", "baz")]) - node5 = Template(wrap([Text("foo")]), [ + node1 = Template(wraptext("foobar")) + node2 = Template(wraptext("foo"), [pgenh("1", "bar"), + pgens("abc", "def")]) + node3 = Template(wraptext("foo"), [pgenh("1", "bar"), + pgens("abc", "def")]) + node4 = Template(wraptext("foo"), [pgenh("1", "bar"), + pgenh("2", "baz")]) + node5 = Template(wraptext("foo"), [ pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) - node6 = Template(wrap([Text("foo")]), [ + node6 = Template(wraptext("foo"), [ pgens(" a", "b"), pgens("b", "c"), pgens("a ", "d")]) - node7 = Template(wrap([Text("foo")]), [ + node7 = Template(wraptext("foo"), [ pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")]) - node8 = Template(wrap([Text("foo")]), [ + node8 = Template(wraptext("foo"), [ pgens("1 ", "a"), pgens(" 1", "b"), pgens("2", "c")]) - node9 = Template(wrap([Text("foo")]), [ + node9 = Template(wraptext("foo"), [ pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) - node10 = Template(wrap([Text("foo")]), [ + node10 = Template(wraptext("foo"), [ pgens("1 ", "a"), pgenh("1", "b"), pgenh("2", "c")]) node2.remove("1") From 1d26c4b312207f956c29c224f34814e486607757 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Thu, 2 May 2013 22:40:35 -0400 Subject: [PATCH 089/115] Why do I always make this mistake? --- tests/test_template.py | 80 +++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/tests/test_template.py b/tests/test_template.py index 3eb88ad..b9fd6e8 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -259,51 +259,51 @@ class TestTemplate(TreeEqualityTestCase): node39.add("1", "e") node40.add("d", "h", before="b") - self.assertEquals("{{a|b=c|d|e=f}}", node1) - self.assertEquals("{{a|b=c|d|g}}", node2) - self.assertEquals("{{a|b=c|d|e=foo|bar}}", node3) + self.assertEqual("{{a|b=c|d|e=f}}", node1) + self.assertEqual("{{a|b=c|d|g}}", node2) + self.assertEqual("{{a|b=c|d|e=foo|bar}}", node3) self.assertIsInstance(node3.params[2].value.get(1), HTMLEntity) - self.assertEquals("{{a|e=f|b=c|d}}", node4) - self.assertEquals("{{a|b=c|f=g| d =e}}", node5) - self.assertEquals("{{a|b=c|b=d|f=g|b=e}}", node6) - self.assertEquals("{{a|b=c|d}}", node7) - self.assertEquals("{{a|b=c|e=f|d}}", node8) - self.assertEquals("{{a|b=c|e=f|d}}", node9) - self.assertEquals("{{a|b=c|e}}", node10) - self.assertEquals("{{a|b=c|d=foo=bar}}", node11) - self.assertEquals("{{a|b=c|foo=bar}}", node12) + self.assertEqual("{{a|e=f|b=c|d}}", node4) + self.assertEqual("{{a|b=c|f=g| d =e}}", node5) + self.assertEqual("{{a|b=c|b=d|f=g|b=e}}", node6) + self.assertEqual("{{a|b=c|d}}", node7) + self.assertEqual("{{a|b=c|e=f|d}}", node8) + self.assertEqual("{{a|b=c|e=f|d}}", node9) + self.assertEqual("{{a|b=c|e}}", node10) + self.assertEqual("{{a|b=c|d=foo=bar}}", node11) + self.assertEqual("{{a|b=c|foo=bar}}", node12) self.assertIsInstance(node12.params[1].value.get(1), HTMLEntity) - self.assertEquals("{{a|\nb = c|\nd = e|\nf = g|\nh = i}}", node13) - self.assertEquals("{{a\n|b =c\n|d = e|f =g\n|h = i\n|j =k\n}}", node14) - self.assertEquals("{{a|b = c\n|\nd = e|\nf =g |h =i}}", node15) - self.assertEquals("{{a|\nb = c|\nd = e|\nf = g|h=i}}", node16) - self.assertEquals("{{a|\nb = c|\nd = e|\nf = g| i}}", node17) - self.assertEquals("{{a\n|b =c\n|d = e|f =g\n|h = i\n|k\n}}", node18) - self.assertEquals("{{a|b = c\n|\nd = e|\nf =g |i}}", node19) - self.assertEquals("{{a|\nb = c|\nd = e|\nf = g|i}}", node20) - self.assertEquals("{{a|b|c}}", node21) - self.assertEquals("{{a|b|3=c}}", node22) - self.assertEquals("{{a|b|c=d}}", node23) - self.assertEquals("{{a|b|c|d|e|f}}", node24) - self.assertEquals("{{a|b|c|4=d|5=e|f}}", node25) - self.assertEquals("{{a|b|c|4=d|5=e|6=f}}", node26) - self.assertEquals("{{a|b|c=foo=bar}}", node27) - self.assertEquals("{{a|b|foo=bar}}", node28) + self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|\nh = i}}", node13) + self.assertEqual("{{a\n|b =c\n|d = e|f =g\n|h = i\n|j =k\n}}", node14) + self.assertEqual("{{a|b = c\n|\nd = e|\nf =g |h =i}}", node15) + self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|h=i}}", node16) + self.assertEqual("{{a|\nb = c|\nd = e|\nf = g| i}}", node17) + self.assertEqual("{{a\n|b =c\n|d = e|f =g\n|h = i\n|k\n}}", node18) + self.assertEqual("{{a|b = c\n|\nd = e|\nf =g |i}}", node19) + self.assertEqual("{{a|\nb = c|\nd = e|\nf = g|i}}", node20) + self.assertEqual("{{a|b|c}}", node21) + self.assertEqual("{{a|b|3=c}}", node22) + self.assertEqual("{{a|b|c=d}}", node23) + self.assertEqual("{{a|b|c|d|e|f}}", node24) + self.assertEqual("{{a|b|c|4=d|5=e|f}}", node25) + self.assertEqual("{{a|b|c|4=d|5=e|6=f}}", node26) + self.assertEqual("{{a|b|c=foo=bar}}", node27) + self.assertEqual("{{a|b|foo=bar}}", node28) self.assertIsInstance(node28.params[1].value.get(1), HTMLEntity) - self.assertEquals("{{a|b=d}}", node29) - self.assertEquals("{{a|foo=bar}}", node30) + self.assertEqual("{{a|b=d}}", node29) + self.assertEqual("{{a|foo=bar}}", node30) self.assertIsInstance(node30.params[0].value.get(1), HTMLEntity) - self.assertEquals("{{a|1=foo=bar}}", node31) - self.assertEquals("{{a|foo=bar}}", node32) + self.assertEqual("{{a|1=foo=bar}}", node31) + self.assertEqual("{{a|foo=bar}}", node32) self.assertIsInstance(node32.params[0].value.get(1), HTMLEntity) - self.assertEquals("{{a|\nb = c|\nd = foo|\nf = g}}", node33) - self.assertEquals("{{a\n|b =c\n|d = e|f =foo\n|h = i\n}}", node34) - self.assertEquals("{{a|b = c\n|\nd = e|\nf =foo }}", node35) - self.assertEquals("{{a|\nb = c |\nd =foo|\nf = g }}", node36) - self.assertEquals("{{a|b=k|d=e|i=j}}", node37) - self.assertEquals("{{a|1=e|x=y|2=d}}", node38) - self.assertEquals("{{a|x=y|e|d}}", node39) - self.assertEquals("{{a|b=c|d=h|f=g}}", node40) + self.assertEqual("{{a|\nb = c|\nd = foo|\nf = g}}", node33) + self.assertEqual("{{a\n|b =c\n|d = e|f =foo\n|h = i\n}}", node34) + self.assertEqual("{{a|b = c\n|\nd = e|\nf =foo }}", node35) + self.assertEqual("{{a|\nb = c |\nd =foo|\nf = g }}", node36) + self.assertEqual("{{a|b=k|d=e|i=j}}", node37) + self.assertEqual("{{a|1=e|x=y|2=d}}", node38) + self.assertEqual("{{a|x=y|e|d}}", node39) + self.assertEqual("{{a|b=c|d=h|f=g}}", node40) def test_remove(self): """test Template.remove()""" From 3b78541eeb19cf0cb528cd856e8f3048d354fb4e Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 3 May 2013 10:57:30 -0400 Subject: [PATCH 090/115] Clean up indentation. --- tests/test_template.py | 66 +++++++++++++++++++++----------------------------- 1 file changed, 27 insertions(+), 39 deletions(-) diff --git a/tests/test_template.py b/tests/test_template.py index b9fd6e8..31ed33b 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -149,32 +149,24 @@ class TestTemplate(TreeEqualityTestCase): node10 = Template(wraptext("a"), [pgens("b", "c"), pgenh("1", "e")]) node11 = Template(wraptext("a"), [pgens("b", "c")]) node12 = Template(wraptext("a"), [pgens("b", "c")]) - node13 = Template(wraptext("a"), [pgens("\nb ", " c"), - pgens("\nd ", " e"), - pgens("\nf ", " g")]) - node14 = Template(wraptext("a\n"), [pgens("b ", "c\n"), - pgens("d ", " e"), - pgens("f ", "g\n"), - pgens("h ", " i\n")]) - node15 = Template(wraptext("a"), [pgens("b ", " c\n"), - pgens("\nd ", " e"), - pgens("\nf ", "g ")]) - node16 = Template(wraptext("a"), [pgens("\nb ", " c"), - pgens("\nd ", " e"), - pgens("\nf ", " g")]) - node17 = Template(wraptext("a"), [pgens("\nb ", " c"), - pgens("\nd ", " e"), - pgens("\nf ", " g")]) - node18 = Template(wraptext("a\n"), [pgens("b ", "c\n"), - pgens("d ", " e"), - pgens("f ", "g\n"), - pgens("h ", " i\n")]) - node19 = Template(wraptext("a"), [pgens("b ", " c\n"), - pgens("\nd ", " e"), - pgens("\nf ", "g ")]) - node20 = Template(wraptext("a"), [pgens("\nb ", " c"), - pgens("\nd ", " e"), - pgens("\nf ", " g")]) + node13 = Template(wraptext("a"), [ + pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) + node14 = Template(wraptext("a\n"), [ + pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"), + pgens("h ", " i\n")]) + node15 = Template(wraptext("a"), [ + pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")]) + node16 = Template(wraptext("a"), [ + pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) + node17 = Template(wraptext("a"), [ + pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) + node18 = Template(wraptext("a\n"), [ + pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"), + pgens("h ", " i\n")]) + node19 = Template(wraptext("a"), [ + pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")]) + node20 = Template(wraptext("a"), [ + pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) node21 = Template(wraptext("a"), [pgenh("1", "b")]) node22 = Template(wraptext("a"), [pgenh("1", "b")]) node23 = Template(wraptext("a"), [pgenh("1", "b")]) @@ -190,19 +182,15 @@ class TestTemplate(TreeEqualityTestCase): node30 = Template(wraptext("a"), [pgenh("1", "b")]) node31 = Template(wraptext("a"), [pgenh("1", "b")]) node32 = Template(wraptext("a"), [pgens("1", "b")]) - node33 = Template(wraptext("a"), [pgens("\nb ", " c"), - pgens("\nd ", " e"), - pgens("\nf ", " g")]) - node34 = Template(wraptext("a\n"), [pgens("b ", "c\n"), - pgens("d ", " e"), - pgens("f ", "g\n"), - pgens("h ", " i\n")]) - node35 = Template(wraptext("a"), [pgens("b ", " c\n"), - pgens("\nd ", " e"), - pgens("\nf ", "g ")]) - node36 = Template(wraptext("a"), [pgens("\nb ", " c "), - pgens("\nd ", " e "), - pgens("\nf ", " g ")]) + node33 = Template(wraptext("a"), [ + pgens("\nb ", " c"), pgens("\nd ", " e"), pgens("\nf ", " g")]) + node34 = Template(wraptext("a\n"), [ + pgens("b ", "c\n"), pgens("d ", " e"), pgens("f ", "g\n"), + pgens("h ", " i\n")]) + node35 = Template(wraptext("a"), [ + pgens("b ", " c\n"), pgens("\nd ", " e"), pgens("\nf ", "g ")]) + node36 = Template(wraptext("a"), [ + pgens("\nb ", " c "), pgens("\nd ", " e "), pgens("\nf ", " g ")]) node37 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), pgens("b", "f"), pgens("b", "h"), pgens("i", "j")]) From 7853e207451a69081573624856025f2a3f750f83 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 3 May 2013 23:43:57 -0400 Subject: [PATCH 091/115] Move wrap() and wraptext() TO _test_tree_equality. --- tests/_test_tree_equality.py | 4 ++++ tests/test_argument.py | 6 +----- tests/test_builder.py | 6 +----- tests/test_heading.py | 6 +----- tests/test_html_entity.py | 6 +----- tests/test_parameter.py | 6 +----- tests/test_template.py | 6 +----- tests/test_wikilink.py | 6 +----- 8 files changed, 11 insertions(+), 35 deletions(-) diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index 758a72e..a12bd68 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -26,8 +26,12 @@ from unittest import TestCase from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, Tag, Template, Text, Wikilink) from mwparserfromhell.nodes.extras import Attribute, Parameter +from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode +wrap = lambda L: Wikicode(SmartList(L)) +wraptext = lambda t: wrap([Text(t)]) + class TreeEqualityTestCase(TestCase): """A base test case with support for comparing the equality of node trees. diff --git a/tests/test_argument.py b/tests/test_argument.py index e0524c4..ae5ae62 100644 --- a/tests/test_argument.py +++ b/tests/test_argument.py @@ -25,12 +25,8 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Argument, Text -from mwparserfromhell.smart_list import SmartList -from mwparserfromhell.wikicode import Wikicode -from ._test_tree_equality import TreeEqualityTestCase - -wrap = lambda L: Wikicode(SmartList(L)) +from ._test_tree_equality import TreeEqualityTestCase, wrap class TestArgument(TreeEqualityTestCase): """Test cases for the Argument node.""" diff --git a/tests/test_builder.py b/tests/test_builder.py index 1e578ed..76917e8 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -28,12 +28,8 @@ from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, from mwparserfromhell.nodes.extras import Attribute, Parameter from mwparserfromhell.parser import tokens from mwparserfromhell.parser.builder import Builder -from mwparserfromhell.smart_list import SmartList -from mwparserfromhell.wikicode import Wikicode -from ._test_tree_equality import TreeEqualityTestCase - -wrap = lambda L: Wikicode(SmartList(L)) +from ._test_tree_equality import TreeEqualityTestCase, wrap class TestBuilder(TreeEqualityTestCase): """Tests for the builder, which turns tokens into Wikicode objects.""" diff --git a/tests/test_heading.py b/tests/test_heading.py index a0e78e5..88603a8 100644 --- a/tests/test_heading.py +++ b/tests/test_heading.py @@ -25,12 +25,8 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Heading, Text -from mwparserfromhell.smart_list import SmartList -from mwparserfromhell.wikicode import Wikicode -from ._test_tree_equality import TreeEqualityTestCase - -wrap = lambda L: Wikicode(SmartList(L)) +from ._test_tree_equality import TreeEqualityTestCase, wrap class TestHeading(TreeEqualityTestCase): """Test cases for the Heading node.""" diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py index a7a9669..b6b4394 100644 --- a/tests/test_html_entity.py +++ b/tests/test_html_entity.py @@ -25,12 +25,8 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import HTMLEntity -from mwparserfromhell.smart_list import SmartList -from mwparserfromhell.wikicode import Wikicode -from ._test_tree_equality import TreeEqualityTestCase - -wrap = lambda L: Wikicode(SmartList(L)) +from ._test_tree_equality import TreeEqualityTestCase, wrap class TestHTMLEntity(TreeEqualityTestCase): """Test cases for the HTMLEntity node.""" diff --git a/tests/test_parameter.py b/tests/test_parameter.py index b46ad71..8e85eda 100644 --- a/tests/test_parameter.py +++ b/tests/test_parameter.py @@ -26,12 +26,8 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text from mwparserfromhell.nodes.extras import Parameter -from mwparserfromhell.smart_list import SmartList -from mwparserfromhell.wikicode import Wikicode -from ._test_tree_equality import TreeEqualityTestCase - -wrap = lambda L: Wikicode(SmartList(L)) +from ._test_tree_equality import TreeEqualityTestCase, wrap class TestParameter(TreeEqualityTestCase): """Test cases for the Parameter node extra.""" diff --git a/tests/test_template.py b/tests/test_template.py index 31ed33b..81b7382 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -26,12 +26,8 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import HTMLEntity, Template, Text from mwparserfromhell.nodes.extras import Parameter -from mwparserfromhell.smart_list import SmartList -from mwparserfromhell.wikicode import Wikicode -from ._test_tree_equality import TreeEqualityTestCase +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext -wrap = lambda L: Wikicode(SmartList(L)) -wraptext = lambda t: wrap([Text(t)]) pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True) pgenh = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=False) diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py index 422489f..7c02744 100644 --- a/tests/test_wikilink.py +++ b/tests/test_wikilink.py @@ -25,12 +25,8 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text, Wikilink -from mwparserfromhell.smart_list import SmartList -from mwparserfromhell.wikicode import Wikicode -from ._test_tree_equality import TreeEqualityTestCase - -wrap = lambda L: Wikicode(SmartList(L)) +from ._test_tree_equality import TreeEqualityTestCase, wrap class TestWikilink(TreeEqualityTestCase): """Test cases for the Wikilink node.""" From eea5c774e342752dae016d79782bf755ca48de53 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Fri, 3 May 2013 23:52:10 -0400 Subject: [PATCH 092/115] Clean up some repetitive lines. --- tests/test_argument.py | 13 ++++--------- tests/test_comment.py | 7 +++---- tests/test_heading.py | 7 +++---- tests/test_html_entity.py | 20 +++++++------------- tests/test_text.py | 7 +++---- tests/test_wikilink.py | 13 ++++--------- 6 files changed, 24 insertions(+), 43 deletions(-) diff --git a/tests/test_argument.py b/tests/test_argument.py index ae5ae62..3a959b6 100644 --- a/tests/test_argument.py +++ b/tests/test_argument.py @@ -41,16 +41,11 @@ class TestArgument(TreeEqualityTestCase): def test_strip(self): """test Argument.__strip__()""" node = Argument(wrap([Text("foobar")])) - self.assertIs(None, node.__strip__(True, True)) - self.assertIs(None, node.__strip__(True, False)) - self.assertIs(None, node.__strip__(False, True)) - self.assertIs(None, node.__strip__(False, False)) - node2 = Argument(wrap([Text("foo")]), wrap([Text("bar")])) - self.assertEqual("bar", node2.__strip__(True, True)) - self.assertEqual("bar", node2.__strip__(True, False)) - self.assertEqual("bar", node2.__strip__(False, True)) - self.assertEqual("bar", node2.__strip__(False, False)) + for a in (True, False): + for b in (True, False): + self.assertIs(None, node.__strip__(a, b)) + self.assertEqual("bar", node2.__strip__(a, b)) def test_showtree(self): """test Argument.__showtree__()""" diff --git a/tests/test_comment.py b/tests/test_comment.py index 980f594..a7a3c4d 100644 --- a/tests/test_comment.py +++ b/tests/test_comment.py @@ -39,10 +39,9 @@ class TestComment(TreeEqualityTestCase): def test_strip(self): """test Comment.__strip__()""" node = Comment("foobar") - self.assertIs(None, node.__strip__(True, True)) - self.assertIs(None, node.__strip__(True, False)) - self.assertIs(None, node.__strip__(False, True)) - self.assertIs(None, node.__strip__(False, False)) + for a in (True, False): + for b in (True, False): + self.assertIs(None, node.__strip__(a, b)) def test_showtree(self): """test Comment.__showtree__()""" diff --git a/tests/test_heading.py b/tests/test_heading.py index 88603a8..79b0ebf 100644 --- a/tests/test_heading.py +++ b/tests/test_heading.py @@ -41,10 +41,9 @@ class TestHeading(TreeEqualityTestCase): def test_strip(self): """test Heading.__strip__()""" node = Heading(wrap([Text("foobar")]), 3) - self.assertEqual("foobar", node.__strip__(True, True)) - self.assertEqual("foobar", node.__strip__(True, False)) - self.assertEqual("foobar", node.__strip__(False, True)) - self.assertEqual("foobar", node.__strip__(False, False)) + for a in (True, False): + for b in (True, False): + self.assertEqual("foobar", node.__strip__(a, b)) def test_showtree(self): """test Heading.__showtree__()""" diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py index b6b4394..d3d23bf 100644 --- a/tests/test_html_entity.py +++ b/tests/test_html_entity.py @@ -47,19 +47,13 @@ class TestHTMLEntity(TreeEqualityTestCase): node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) node2 = HTMLEntity("107", named=False, hexadecimal=False) node3 = HTMLEntity("e9", named=False, hexadecimal=True) - - self.assertEqual("\xa0", node1.__strip__(True, True)) - self.assertEqual("\xa0", node1.__strip__(True, False)) - self.assertEqual(" ", node1.__strip__(False, True)) - self.assertEqual(" ", node1.__strip__(False, False)) - self.assertEqual("k", node2.__strip__(True, True)) - self.assertEqual("k", node2.__strip__(True, False)) - self.assertEqual("k", node2.__strip__(False, True)) - self.assertEqual("k", node2.__strip__(False, False)) - self.assertEqual("é", node3.__strip__(True, True)) - self.assertEqual("é", node3.__strip__(True, False)) - self.assertEqual("é", node3.__strip__(False, True)) - self.assertEqual("é", node3.__strip__(False, False)) + for a in (True, False): + self.assertEqual("\xa0", node1.__strip__(True, a)) + self.assertEqual(" ", node1.__strip__(False, a)) + self.assertEqual("k", node2.__strip__(True, a)) + self.assertEqual("k", node2.__strip__(False, a)) + self.assertEqual("é", node3.__strip__(True, a)) + self.assertEqual("é", node3.__strip__(False, a)) def test_showtree(self): """test HTMLEntity.__showtree__()""" diff --git a/tests/test_text.py b/tests/test_text.py index 13636bf..f3649dd 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -39,10 +39,9 @@ class TestText(unittest.TestCase): def test_strip(self): """test Text.__strip__()""" node = Text("foobar") - self.assertIs(node, node.__strip__(True, True)) - self.assertIs(node, node.__strip__(True, False)) - self.assertIs(node, node.__strip__(False, True)) - self.assertIs(node, node.__strip__(False, False)) + for a in (True, False): + for b in (True, False): + self.assertIs(node, node.__strip__(a, b)) def test_showtree(self): """test Text.__showtree__()""" diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py index 7c02744..09ca5b3 100644 --- a/tests/test_wikilink.py +++ b/tests/test_wikilink.py @@ -41,16 +41,11 @@ class TestWikilink(TreeEqualityTestCase): def test_strip(self): """test Wikilink.__strip__()""" node = Wikilink(wrap([Text("foobar")])) - self.assertEqual("foobar", node.__strip__(True, True)) - self.assertEqual("foobar", node.__strip__(True, False)) - self.assertEqual("foobar", node.__strip__(False, True)) - self.assertEqual("foobar", node.__strip__(False, False)) - node2 = Wikilink(wrap([Text("foo")]), wrap([Text("bar")])) - self.assertEqual("bar", node2.__strip__(True, True)) - self.assertEqual("bar", node2.__strip__(True, False)) - self.assertEqual("bar", node2.__strip__(False, True)) - self.assertEqual("bar", node2.__strip__(False, False)) + for a in (True, False): + for b in (True, False): + self.assertEqual("foobar", node.__strip__(a, b)) + self.assertEqual("bar", node2.__strip__(a, b)) def test_showtree(self): """test Wikilink.__showtree__()""" From 06873ee6edcc88b6ee57d5ad57296655f2fb85c8 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 4 May 2013 15:50:48 -0400 Subject: [PATCH 093/115] Add tests for __iternodes__(); add a getnodes() function. --- tests/_test_tree_equality.py | 9 +++++++++ tests/test_argument.py | 19 ++++++++++++++++++- tests/test_comment.py | 7 +++++++ tests/test_heading.py | 12 +++++++++++- tests/test_html_entity.py | 7 +++++++ tests/test_template.py | 26 +++++++++++++++++++++++++- tests/test_text.py | 7 +++++++ tests/test_wikilink.py | 19 ++++++++++++++++++- 8 files changed, 102 insertions(+), 4 deletions(-) diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index a12bd68..6d9b26a 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -32,6 +32,15 @@ from mwparserfromhell.wikicode import Wikicode wrap = lambda L: Wikicode(SmartList(L)) wraptext = lambda t: wrap([Text(t)]) +def getnodes(code): + """Iterate over all child nodes of a given parent node. + + Imitates Wikicode._get_all_nodes(). + """ + for node in code.nodes: + for context, child in node.__iternodes__(getnodes): + yield child + class TreeEqualityTestCase(TestCase): """A base test case with support for comparing the equality of node trees. diff --git a/tests/test_argument.py b/tests/test_argument.py index 3a959b6..a9469d4 100644 --- a/tests/test_argument.py +++ b/tests/test_argument.py @@ -26,7 +26,7 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Argument, Text -from ._test_tree_equality import TreeEqualityTestCase, wrap +from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap class TestArgument(TreeEqualityTestCase): """Test cases for the Argument node.""" @@ -38,6 +38,23 @@ class TestArgument(TreeEqualityTestCase): node2 = Argument(wrap([Text("foo")]), wrap([Text("bar")])) self.assertEqual("{{{foo|bar}}}", str(node2)) + def test_iternodes(self): + """test Argument.__iternodes__()""" + node1n1 = Text("foobar") + node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("baz") + node1 = Argument(wrap([node1n1])) + node2 = Argument(wrap([node2n1]), wrap([node2n2, node2n3])) + gen1 = node1.__iternodes__(getnodes) + gen2 = node2.__iternodes__(getnodes) + self.assertEqual((None, node1), next(gen1)) + self.assertEqual((None, node2), next(gen2)) + self.assertEqual((node1.name, node1n1), next(gen1)) + self.assertEqual((node2.name, node2n1), next(gen2)) + self.assertEqual((node2.default, node2n2), next(gen2)) + self.assertEqual((node2.default, node2n3), next(gen2)) + self.assertRaises(StopIteration, next, gen1) + self.assertRaises(StopIteration, next, gen2) + def test_strip(self): """test Argument.__strip__()""" node = Argument(wrap([Text("foobar")])) diff --git a/tests/test_comment.py b/tests/test_comment.py index a7a3c4d..44225a2 100644 --- a/tests/test_comment.py +++ b/tests/test_comment.py @@ -36,6 +36,13 @@ class TestComment(TreeEqualityTestCase): node = Comment("foobar") self.assertEqual("", str(node)) + def test_iternodes(self): + """test Comment.__iternodes__()""" + node = Comment("foobar") + gen = node.__iternodes__(None) + self.assertEqual((None, node), next(gen)) + self.assertRaises(StopIteration, next, gen) + def test_strip(self): """test Comment.__strip__()""" node = Comment("foobar") diff --git a/tests/test_heading.py b/tests/test_heading.py index 79b0ebf..38f6545 100644 --- a/tests/test_heading.py +++ b/tests/test_heading.py @@ -26,7 +26,7 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Heading, Text -from ._test_tree_equality import TreeEqualityTestCase, wrap +from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap class TestHeading(TreeEqualityTestCase): """Test cases for the Heading node.""" @@ -38,6 +38,16 @@ class TestHeading(TreeEqualityTestCase): node2 = Heading(wrap([Text(" zzz ")]), 5) self.assertEqual("===== zzz =====", str(node2)) + def test_iternodes(self): + """test Heading.__iternodes__()""" + text1, text2 = Text("foo"), Text("bar") + node = Heading(wrap([text1, text2]), 3) + gen = node.__iternodes__(getnodes) + self.assertEqual((None, node), next(gen)) + self.assertEqual((node.title, text1), next(gen)) + self.assertEqual((node.title, text2), next(gen)) + self.assertRaises(StopIteration, next, gen) + def test_strip(self): """test Heading.__strip__()""" node = Heading(wrap([Text("foobar")]), 3) diff --git a/tests/test_html_entity.py b/tests/test_html_entity.py index d3d23bf..d38e5ec 100644 --- a/tests/test_html_entity.py +++ b/tests/test_html_entity.py @@ -42,6 +42,13 @@ class TestHTMLEntity(TreeEqualityTestCase): self.assertEqual("k", str(node3)) self.assertEqual("l", str(node4)) + def test_iternodes(self): + """test HTMLEntity.__iternodes__()""" + node = HTMLEntity("nbsp", named=True, hexadecimal=False) + gen = node.__iternodes__(None) + self.assertEqual((None, node), next(gen)) + self.assertRaises(StopIteration, next, gen) + def test_strip(self): """test HTMLEntity.__strip__()""" node1 = HTMLEntity("nbsp", named=True, hexadecimal=False) diff --git a/tests/test_template.py b/tests/test_template.py index 81b7382..28592df 100644 --- a/tests/test_template.py +++ b/tests/test_template.py @@ -26,7 +26,7 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import HTMLEntity, Template, Text from mwparserfromhell.nodes.extras import Parameter -from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext +from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True) pgenh = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=False) @@ -42,6 +42,30 @@ class TestTemplate(TreeEqualityTestCase): [pgenh("1", "bar"), pgens("abc", "def")]) self.assertEqual("{{foo|bar|abc=def}}", str(node2)) + def test_iternodes(self): + """test Template.__iternodes__()""" + node1n1 = Text("foobar") + node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("abc") + node2n4, node2n5 = Text("def"), Text("ghi") + node2p1 = Parameter(wraptext("1"), wrap([node2n2]), showkey=False) + node2p2 = Parameter(wrap([node2n3]), wrap([node2n4, node2n5]), + showkey=True) + node1 = Template(wrap([node1n1])) + node2 = Template(wrap([node2n1]), [node2p1, node2p2]) + + gen1 = node1.__iternodes__(getnodes) + gen2 = node2.__iternodes__(getnodes) + self.assertEqual((None, node1), next(gen1)) + self.assertEqual((None, node2), next(gen2)) + self.assertEqual((node1.name, node1n1), next(gen1)) + self.assertEqual((node2.name, node2n1), next(gen2)) + self.assertEqual((node2.params[0].value, node2n2), next(gen2)) + self.assertEqual((node2.params[1].name, node2n3), next(gen2)) + self.assertEqual((node2.params[1].value, node2n4), next(gen2)) + self.assertEqual((node2.params[1].value, node2n5), next(gen2)) + self.assertRaises(StopIteration, next, gen1) + self.assertRaises(StopIteration, next, gen2) + def test_strip(self): """test Template.__strip__()""" node1 = Template(wraptext("foobar")) diff --git a/tests/test_text.py b/tests/test_text.py index f3649dd..35ac340 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -36,6 +36,13 @@ class TestText(unittest.TestCase): node2 = Text("fóóbar") self.assertEqual("fóóbar", str(node2)) + def test_iternodes(self): + """test Text.__iternodes__()""" + node = Text("foobar") + gen = node.__iternodes__(None) + self.assertEqual((None, node), next(gen)) + self.assertRaises(StopIteration, next, gen) + def test_strip(self): """test Text.__strip__()""" node = Text("foobar") diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py index 09ca5b3..d4319c1 100644 --- a/tests/test_wikilink.py +++ b/tests/test_wikilink.py @@ -26,7 +26,7 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text, Wikilink -from ._test_tree_equality import TreeEqualityTestCase, wrap +from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap class TestWikilink(TreeEqualityTestCase): """Test cases for the Wikilink node.""" @@ -38,6 +38,23 @@ class TestWikilink(TreeEqualityTestCase): node2 = Wikilink(wrap([Text("foo")]), wrap([Text("bar")])) self.assertEqual("[[foo|bar]]", str(node2)) + def test_iternodes(self): + """test Wikilink.__iternodes__()""" + node1n1 = Text("foobar") + node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("baz") + node1 = Wikilink(wrap([node1n1])) + node2 = Wikilink(wrap([node2n1]), wrap([node2n2, node2n3])) + gen1 = node1.__iternodes__(getnodes) + gen2 = node2.__iternodes__(getnodes) + self.assertEqual((None, node1), next(gen1)) + self.assertEqual((None, node2), next(gen2)) + self.assertEqual((node1.title, node1n1), next(gen1)) + self.assertEqual((node2.title, node2n1), next(gen2)) + self.assertEqual((node2.text, node2n2), next(gen2)) + self.assertEqual((node2.text, node2n3), next(gen2)) + self.assertRaises(StopIteration, next, gen1) + self.assertRaises(StopIteration, next, gen2) + def test_strip(self): """test Wikilink.__strip__()""" node = Wikilink(wrap([Text("foobar")])) From 3fe629f54188a0f5ffde439811aab656a0418f0c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 4 May 2013 16:18:14 -0400 Subject: [PATCH 094/115] Condense usage of wrap([Text("foo")]) to just wraptext("foo"). --- tests/_test_tree_equality.py | 2 +- tests/test_argument.py | 30 ++++++++-------- tests/test_builder.py | 85 +++++++++++++++++++++----------------------- tests/test_heading.py | 20 +++++------ tests/test_parameter.py | 28 +++++++-------- tests/test_parser.py | 21 +++++------ tests/test_utils.py | 27 ++++++-------- tests/test_wikilink.py | 30 ++++++++-------- 8 files changed, 114 insertions(+), 129 deletions(-) diff --git a/tests/_test_tree_equality.py b/tests/_test_tree_equality.py index 6d9b26a..52130ed 100644 --- a/tests/_test_tree_equality.py +++ b/tests/_test_tree_equality.py @@ -30,7 +30,7 @@ from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode wrap = lambda L: Wikicode(SmartList(L)) -wraptext = lambda t: wrap([Text(t)]) +wraptext = lambda *args: wrap([Text(t) for t in args]) def getnodes(code): """Iterate over all child nodes of a given parent node. diff --git a/tests/test_argument.py b/tests/test_argument.py index a9469d4..8191804 100644 --- a/tests/test_argument.py +++ b/tests/test_argument.py @@ -26,16 +26,16 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Argument, Text -from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap +from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext class TestArgument(TreeEqualityTestCase): """Test cases for the Argument node.""" def test_unicode(self): """test Argument.__unicode__()""" - node = Argument(wrap([Text("foobar")])) + node = Argument(wraptext("foobar")) self.assertEqual("{{{foobar}}}", str(node)) - node2 = Argument(wrap([Text("foo")]), wrap([Text("bar")])) + node2 = Argument(wraptext("foo"), wraptext("bar")) self.assertEqual("{{{foo|bar}}}", str(node2)) def test_iternodes(self): @@ -57,8 +57,8 @@ class TestArgument(TreeEqualityTestCase): def test_strip(self): """test Argument.__strip__()""" - node = Argument(wrap([Text("foobar")])) - node2 = Argument(wrap([Text("foo")]), wrap([Text("bar")])) + node = Argument(wraptext("foobar")) + node2 = Argument(wraptext("foo"), wraptext("bar")) for a in (True, False): for b in (True, False): self.assertIs(None, node.__strip__(a, b)) @@ -70,8 +70,8 @@ class TestArgument(TreeEqualityTestCase): getter, marker = object(), object() get = lambda code: output.append((getter, code)) mark = lambda: output.append(marker) - node1 = Argument(wrap([Text("foobar")])) - node2 = Argument(wrap([Text("foo")]), wrap([Text("bar")])) + node1 = Argument(wraptext("foobar")) + node2 = Argument(wraptext("foo"), wraptext("bar")) node1.__showtree__(output.append, get, mark) node2.__showtree__(output.append, get, mark) valid = [ @@ -81,26 +81,26 @@ class TestArgument(TreeEqualityTestCase): def test_name(self): """test getter/setter for the name attribute""" - name = wrap([Text("foobar")]) + name = wraptext("foobar") node1 = Argument(name) - node2 = Argument(name, wrap([Text("baz")])) + node2 = Argument(name, wraptext("baz")) self.assertIs(name, node1.name) self.assertIs(name, node2.name) node1.name = "héhehé" node2.name = "héhehé" - self.assertWikicodeEqual(wrap([Text("héhehé")]), node1.name) - self.assertWikicodeEqual(wrap([Text("héhehé")]), node2.name) + self.assertWikicodeEqual(wraptext("héhehé"), node1.name) + self.assertWikicodeEqual(wraptext("héhehé"), node2.name) def test_default(self): """test getter/setter for the default attribute""" - default = wrap([Text("baz")]) - node1 = Argument(wrap([Text("foobar")])) - node2 = Argument(wrap([Text("foobar")]), default) + default = wraptext("baz") + node1 = Argument(wraptext("foobar")) + node2 = Argument(wraptext("foobar"), default) self.assertIs(None, node1.default) self.assertIs(default, node2.default) node1.default = "buzz" node2.default = None - self.assertWikicodeEqual(wrap([Text("buzz")]), node1.default) + self.assertWikicodeEqual(wraptext("buzz"), node1.default) self.assertIs(None, node2.default) if __name__ == "__main__": diff --git a/tests/test_builder.py b/tests/test_builder.py index 76917e8..903d144 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -29,7 +29,7 @@ from mwparserfromhell.nodes.extras import Attribute, Parameter from mwparserfromhell.parser import tokens from mwparserfromhell.parser.builder import Builder -from ._test_tree_equality import TreeEqualityTestCase, wrap +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext class TestBuilder(TreeEqualityTestCase): """Tests for the builder, which turns tokens into Wikicode objects.""" @@ -40,10 +40,10 @@ class TestBuilder(TreeEqualityTestCase): def test_text(self): """tests for building Text nodes""" tests = [ - ([tokens.Text(text="foobar")], wrap([Text("foobar")])), - ([tokens.Text(text="fóóbar")], wrap([Text("fóóbar")])), + ([tokens.Text(text="foobar")], wraptext("foobar")), + ([tokens.Text(text="fóóbar")], wraptext("fóóbar")), ([tokens.Text(text="spam"), tokens.Text(text="eggs")], - wrap([Text("spam"), Text("eggs")])), + wraptext("spam", "eggs")), ] for test, valid in tests: self.assertWikicodeEqual(valid, self.builder.build(test)) @@ -53,25 +53,24 @@ class TestBuilder(TreeEqualityTestCase): tests = [ ([tokens.TemplateOpen(), tokens.Text(text="foobar"), tokens.TemplateClose()], - wrap([Template(wrap([Text("foobar")]))])), + wrap([Template(wraptext("foobar"))])), ([tokens.TemplateOpen(), tokens.Text(text="spam"), tokens.Text(text="eggs"), tokens.TemplateClose()], - wrap([Template(wrap([Text("spam"), Text("eggs")]))])), + wrap([Template(wraptext("spam", "eggs"))])), ([tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), tokens.TemplateClose()], - wrap([Template(wrap([Text("foo")]), params=[ - Parameter(wrap([Text("1")]), wrap([Text("bar")]), - showkey=False)])])), + wrap([Template(wraptext("foo"), params=[ + Parameter(wraptext("1"), wraptext("bar"), showkey=False)])])), ([tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), tokens.TemplateParamEquals(), tokens.Text(text="baz"), tokens.TemplateClose()], - wrap([Template(wrap([Text("foo")]), params=[ - Parameter(wrap([Text("bar")]), wrap([Text("baz")]))])])), + wrap([Template(wraptext("foo"), params=[ + Parameter(wraptext("bar"), wraptext("baz"))])])), ([tokens.TemplateOpen(), tokens.Text(text="foo"), tokens.TemplateParamSeparator(), tokens.Text(text="bar"), @@ -82,14 +81,12 @@ class TestBuilder(TreeEqualityTestCase): tokens.TemplateParamEquals(), tokens.Text(text="buff"), tokens.TemplateParamSeparator(), tokens.Text(text="baff"), tokens.TemplateClose()], - wrap([Template(wrap([Text("foo")]), params=[ - Parameter(wrap([Text("bar")]), wrap([Text("baz")])), - Parameter(wrap([Text("1")]), wrap([Text("biz")]), - showkey=False), - Parameter(wrap([Text("2")]), wrap([Text("buzz")]), - showkey=False), - Parameter(wrap([Text("3")]), wrap([Text("buff")])), - Parameter(wrap([Text("3")]), wrap([Text("baff")]), + wrap([Template(wraptext("foo"), params=[ + Parameter(wraptext("bar"), wraptext("baz")), + Parameter(wraptext("1"), wraptext("biz"), showkey=False), + Parameter(wraptext("2"), wraptext("buzz"), showkey=False), + Parameter(wraptext("3"), wraptext("buff")), + Parameter(wraptext("3"), wraptext("baff"), showkey=False)])])), ] for test, valid in tests: @@ -100,23 +97,22 @@ class TestBuilder(TreeEqualityTestCase): tests = [ ([tokens.ArgumentOpen(), tokens.Text(text="foobar"), tokens.ArgumentClose()], - wrap([Argument(wrap([Text("foobar")]))])), + wrap([Argument(wraptext("foobar"))])), ([tokens.ArgumentOpen(), tokens.Text(text="spam"), tokens.Text(text="eggs"), tokens.ArgumentClose()], - wrap([Argument(wrap([Text("spam"), Text("eggs")]))])), + wrap([Argument(wraptext("spam", "eggs"))])), ([tokens.ArgumentOpen(), tokens.Text(text="foo"), tokens.ArgumentSeparator(), tokens.Text(text="bar"), tokens.ArgumentClose()], - wrap([Argument(wrap([Text("foo")]), wrap([Text("bar")]))])), + wrap([Argument(wraptext("foo"), wraptext("bar"))])), ([tokens.ArgumentOpen(), tokens.Text(text="foo"), tokens.Text(text="bar"), tokens.ArgumentSeparator(), tokens.Text(text="baz"), tokens.Text(text="biz"), tokens.ArgumentClose()], - wrap([Argument(wrap([Text("foo"), Text("bar")]), - wrap([Text("baz"), Text("biz")]))])), + wrap([Argument(wraptext("foo", "bar"), wraptext("baz", "biz"))])), ] for test, valid in tests: self.assertWikicodeEqual(valid, self.builder.build(test)) @@ -126,23 +122,22 @@ class TestBuilder(TreeEqualityTestCase): tests = [ ([tokens.WikilinkOpen(), tokens.Text(text="foobar"), tokens.WikilinkClose()], - wrap([Wikilink(wrap([Text("foobar")]))])), + wrap([Wikilink(wraptext("foobar"))])), ([tokens.WikilinkOpen(), tokens.Text(text="spam"), tokens.Text(text="eggs"), tokens.WikilinkClose()], - wrap([Wikilink(wrap([Text("spam"), Text("eggs")]))])), + wrap([Wikilink(wraptext("spam", "eggs"))])), ([tokens.WikilinkOpen(), tokens.Text(text="foo"), tokens.WikilinkSeparator(), tokens.Text(text="bar"), tokens.WikilinkClose()], - wrap([Wikilink(wrap([Text("foo")]), wrap([Text("bar")]))])), + wrap([Wikilink(wraptext("foo"), wraptext("bar"))])), ([tokens.WikilinkOpen(), tokens.Text(text="foo"), tokens.Text(text="bar"), tokens.WikilinkSeparator(), tokens.Text(text="baz"), tokens.Text(text="biz"), tokens.WikilinkClose()], - wrap([Wikilink(wrap([Text("foo"), Text("bar")]), - wrap([Text("baz"), Text("biz")]))])), + wrap([Wikilink(wraptext("foo", "bar"), wraptext("baz", "biz"))])), ] for test, valid in tests: self.assertWikicodeEqual(valid, self.builder.build(test)) @@ -172,11 +167,11 @@ class TestBuilder(TreeEqualityTestCase): tests = [ ([tokens.HeadingStart(level=2), tokens.Text(text="foobar"), tokens.HeadingEnd()], - wrap([Heading(wrap([Text("foobar")]), 2)])), + wrap([Heading(wraptext("foobar"), 2)])), ([tokens.HeadingStart(level=4), tokens.Text(text="spam"), tokens.Text(text="eggs"), tokens.HeadingEnd()], - wrap([Heading(wrap([Text("spam"), Text("eggs")]), 4)])), + wrap([Heading(wraptext("spam", "eggs"), 4)])), ] for test, valid in tests: self.assertWikicodeEqual(valid, self.builder.build(test)) @@ -186,11 +181,11 @@ class TestBuilder(TreeEqualityTestCase): tests = [ ([tokens.CommentStart(), tokens.Text(text="foobar"), tokens.CommentEnd()], - wrap([Comment(wrap([Text("foobar")]))])), + wrap([Comment(wraptext("foobar"))])), ([tokens.CommentStart(), tokens.Text(text="spam"), tokens.Text(text="eggs"), tokens.CommentEnd()], - wrap([Comment(wrap([Text("spam"), Text("eggs")]))])), + wrap([Comment(wraptext("spam", "eggs"))])), ] for test, valid in tests: self.assertWikicodeEqual(valid, self.builder.build(test)) @@ -214,10 +209,10 @@ class TestBuilder(TreeEqualityTestCase): tokens.TemplateOpen(), tokens.Text(text="bin"), tokens.TemplateClose(), tokens.TemplateClose()] valid = wrap( - [Template(wrap([Template(wrap([Template(wrap([Template(wrap([Text( - "foo")])), Text("bar")]), params=[Parameter(wrap([Text("baz")]), - wrap([Text("biz")]))]), Text("buzz")])), Text("usr")]), params=[ - Parameter(wrap([Text("1")]), wrap([Template(wrap([Text("bin")]))]), + [Template(wrap([Template(wrap([Template(wrap([Template(wraptext( + "foo")), Text("bar")]), params=[Parameter(wraptext("baz"), + wraptext("biz"))]), Text("buzz")])), Text("usr")]), params=[ + Parameter(wraptext("1"), wrap([Template(wraptext("bin"))]), showkey=False)])]) self.assertWikicodeEqual(valid, self.builder.build(test)) @@ -243,14 +238,14 @@ class TestBuilder(TreeEqualityTestCase): tokens.Text(text="nbsp"), tokens.HTMLEntityEnd(), tokens.TemplateClose()] valid = wrap( - [Template(wrap([Text("a")]), params=[Parameter(wrap([Text("1")]), - wrap([Text("b")]), showkey=False), Parameter(wrap([Text("2")]), - wrap([Template(wrap([Text("c")]), params=[Parameter(wrap([Text("1") - ]), wrap([Wikilink(wrap([Text("d")])), Argument(wrap([Text("e")]))] - ), showkey=False)])]), showkey=False)]), Wikilink(wrap([Text("f")] - ), wrap([Argument(wrap([Text("g")])), Comment(wrap([Text("h")]))]) - ), Template(wrap([Text("i")]), params=[Parameter(wrap([Text("j")]), - wrap([HTMLEntity("nbsp", named=True)]))])]) + [Template(wraptext("a"), params=[Parameter(wraptext("1"), wraptext( + "b"), showkey=False), Parameter(wraptext("2"), wrap([Template( + wraptext("c"), params=[Parameter(wraptext("1"), wrap([Wikilink( + wraptext("d")), Argument(wraptext("e"))]), showkey=False)])]), + showkey=False)]), Wikilink(wraptext("f"), wrap([Argument(wraptext( + "g")), Comment(wraptext("h"))])), Template(wraptext("i"), params=[ + Parameter(wraptext("j"), wrap([HTMLEntity("nbsp", + named=True)]))])]) self.assertWikicodeEqual(valid, self.builder.build(test)) if __name__ == "__main__": diff --git a/tests/test_heading.py b/tests/test_heading.py index 38f6545..7a65872 100644 --- a/tests/test_heading.py +++ b/tests/test_heading.py @@ -26,16 +26,16 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Heading, Text -from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap +from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext class TestHeading(TreeEqualityTestCase): """Test cases for the Heading node.""" def test_unicode(self): """test Heading.__unicode__()""" - node = Heading(wrap([Text("foobar")]), 2) + node = Heading(wraptext("foobar"), 2) self.assertEqual("==foobar==", str(node)) - node2 = Heading(wrap([Text(" zzz ")]), 5) + node2 = Heading(wraptext(" zzz "), 5) self.assertEqual("===== zzz =====", str(node2)) def test_iternodes(self): @@ -50,7 +50,7 @@ class TestHeading(TreeEqualityTestCase): def test_strip(self): """test Heading.__strip__()""" - node = Heading(wrap([Text("foobar")]), 3) + node = Heading(wraptext("foobar"), 3) for a in (True, False): for b in (True, False): self.assertEqual("foobar", node.__strip__(a, b)) @@ -60,8 +60,8 @@ class TestHeading(TreeEqualityTestCase): output = [] getter = object() get = lambda code: output.append((getter, code)) - node1 = Heading(wrap([Text("foobar")]), 3) - node2 = Heading(wrap([Text(" baz ")]), 4) + node1 = Heading(wraptext("foobar"), 3) + node2 = Heading(wraptext(" baz "), 4) node1.__showtree__(output.append, get, None) node2.__showtree__(output.append, get, None) valid = ["===", (getter, node1.title), "===", @@ -70,20 +70,18 @@ class TestHeading(TreeEqualityTestCase): def test_title(self): """test getter/setter for the title attribute""" - title = wrap([Text("foobar")]) + title = wraptext("foobar") node = Heading(title, 3) self.assertIs(title, node.title) node.title = "héhehé" - self.assertWikicodeEqual(wrap([Text("héhehé")]), node.title) + self.assertWikicodeEqual(wraptext("héhehé"), node.title) def test_level(self): """test getter/setter for the level attribute""" - node = Heading(wrap([Text("foobar")]), 3) + node = Heading(wraptext("foobar"), 3) self.assertEqual(3, node.level) node.level = 5 self.assertEqual(5, node.level) - node.level = True - self.assertEqual(1, node.level) self.assertRaises(ValueError, setattr, node, "level", 0) self.assertRaises(ValueError, setattr, node, "level", 7) self.assertRaises(ValueError, setattr, node, "level", "abc") diff --git a/tests/test_parameter.py b/tests/test_parameter.py index 8e85eda..4786e12 100644 --- a/tests/test_parameter.py +++ b/tests/test_parameter.py @@ -27,43 +27,43 @@ from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text from mwparserfromhell.nodes.extras import Parameter -from ._test_tree_equality import TreeEqualityTestCase, wrap +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext class TestParameter(TreeEqualityTestCase): """Test cases for the Parameter node extra.""" def test_unicode(self): """test Parameter.__unicode__()""" - node = Parameter(wrap([Text("1")]), wrap([Text("foo")]), showkey=False) + node = Parameter(wraptext("1"), wraptext("foo"), showkey=False) self.assertEqual("foo", str(node)) - node2 = Parameter(wrap([Text("foo")]), wrap([Text("bar")])) + node2 = Parameter(wraptext("foo"), wraptext("bar")) self.assertEqual("foo=bar", str(node2)) def test_name(self): """test getter/setter for the name attribute""" - name1 = wrap([Text("1")]) - name2 = wrap([Text("foobar")]) - node1 = Parameter(name1, wrap([Text("foobar")]), showkey=False) - node2 = Parameter(name2, wrap([Text("baz")])) + name1 = wraptext("1") + name2 = wraptext("foobar") + node1 = Parameter(name1, wraptext("foobar"), showkey=False) + node2 = Parameter(name2, wraptext("baz")) self.assertIs(name1, node1.name) self.assertIs(name2, node2.name) node1.name = "héhehé" node2.name = "héhehé" - self.assertWikicodeEqual(wrap([Text("héhehé")]), node1.name) - self.assertWikicodeEqual(wrap([Text("héhehé")]), node2.name) + self.assertWikicodeEqual(wraptext("héhehé"), node1.name) + self.assertWikicodeEqual(wraptext("héhehé"), node2.name) def test_value(self): """test getter/setter for the value attribute""" - value = wrap([Text("bar")]) - node = Parameter(wrap([Text("foo")]), value) + value = wraptext("bar") + node = Parameter(wraptext("foo"), value) self.assertIs(value, node.value) node.value = "héhehé" - self.assertWikicodeEqual(wrap([Text("héhehé")]), node.value) + self.assertWikicodeEqual(wraptext("héhehé"), node.value) def test_showkey(self): """test getter/setter for the showkey attribute""" - node1 = Parameter(wrap([Text("1")]), wrap([Text("foo")]), showkey=False) - node2 = Parameter(wrap([Text("foo")]), wrap([Text("bar")])) + node1 = Parameter(wraptext("1"), wraptext("foo"), showkey=False) + node2 = Parameter(wraptext("foo"), wraptext("bar")) self.assertFalse(node1.showkey) self.assertTrue(node2.showkey) node1.showkey = True diff --git a/tests/test_parser.py b/tests/test_parser.py index 9d2c969..ec5f065 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -26,10 +26,8 @@ import unittest from mwparserfromhell import parser from mwparserfromhell.nodes import Template, Text, Wikilink from mwparserfromhell.nodes.extras import Parameter -from mwparserfromhell.smart_list import SmartList -from mwparserfromhell.wikicode import Wikicode -from ._test_tree_equality import TreeEqualityTestCase +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext from .compat import range class TestParser(TreeEqualityTestCase): @@ -45,18 +43,17 @@ class TestParser(TreeEqualityTestCase): def test_parsing(self): """integration test for parsing overall""" text = "this is text; {{this|is=a|template={{with|[[links]]|in}}it}}" - wrap = lambda L: Wikicode(SmartList(L)) expected = wrap([ Text("this is text; "), - Template(wrap([Text("this")]), [ - Parameter(wrap([Text("is")]), wrap([Text("a")])), - Parameter(wrap([Text("template")]), wrap([ - Template(wrap([Text("with")]), [ - Parameter(wrap([Text("1")]), - wrap([Wikilink(wrap([Text("links")]))]), + Template(wraptext("this"), [ + Parameter(wraptext("is"), wraptext("a")), + Parameter(wraptext("template"), wrap([ + Template(wraptext("with"), [ + Parameter(wraptext("1"), + wrap([Wikilink(wraptext("links"))]), showkey=False), - Parameter(wrap([Text("2")]), - wrap([Text("in")]), showkey=False) + Parameter(wraptext("2"), + wraptext("in"), showkey=False) ]), Text("it") ])) diff --git a/tests/test_utils.py b/tests/test_utils.py index c088530..80a0e5e 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -24,33 +24,28 @@ from __future__ import unicode_literals import unittest from mwparserfromhell.nodes import Template, Text -from mwparserfromhell.smart_list import SmartList from mwparserfromhell.utils import parse_anything -from mwparserfromhell.wikicode import Wikicode -from ._test_tree_equality import TreeEqualityTestCase +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext class TestUtils(TreeEqualityTestCase): """Tests for the utils module, which provides parse_anything().""" def test_parse_anything_valid(self): """tests for valid input to utils.parse_anything()""" - wrap = lambda L: Wikicode(SmartList(L)) - textify = lambda L: wrap([Text(item) for item in L]) tests = [ - (wrap([Text("foobar")]), textify(["foobar"])), - (Template(wrap([Text("spam")])), - wrap([Template(textify(["spam"]))])), - ("fóóbar", textify(["fóóbar"])), - (b"foob\xc3\xa1r", textify(["foobár"])), - (123, textify(["123"])), - (True, textify(["True"])), + (wraptext("foobar"), wraptext("foobar")), + (Template(wraptext("spam")), wrap([Template(wraptext("spam"))])), + ("fóóbar", wraptext("fóóbar")), + (b"foob\xc3\xa1r", wraptext("foobár")), + (123, wraptext("123")), + (True, wraptext("True")), (None, wrap([])), ([Text("foo"), Text("bar"), Text("baz")], - textify(["foo", "bar", "baz"])), - ([wrap([Text("foo")]), Text("bar"), "baz", 123, 456], - textify(["foo", "bar", "baz", "123", "456"])), - ([[[([[((("foo",),),)], "bar"],)]]], textify(["foo", "bar"])) + wraptext("foo", "bar", "baz")), + ([wraptext("foo"), Text("bar"), "baz", 123, 456], + wraptext("foo", "bar", "baz", "123", "456")), + ([[[([[((("foo",),),)], "bar"],)]]], wraptext("foo", "bar")) ] for test, valid in tests: self.assertWikicodeEqual(valid, parse_anything(test)) diff --git a/tests/test_wikilink.py b/tests/test_wikilink.py index d4319c1..7851032 100644 --- a/tests/test_wikilink.py +++ b/tests/test_wikilink.py @@ -26,16 +26,16 @@ import unittest from mwparserfromhell.compat import str from mwparserfromhell.nodes import Text, Wikilink -from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap +from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext class TestWikilink(TreeEqualityTestCase): """Test cases for the Wikilink node.""" def test_unicode(self): """test Wikilink.__unicode__()""" - node = Wikilink(wrap([Text("foobar")])) + node = Wikilink(wraptext("foobar")) self.assertEqual("[[foobar]]", str(node)) - node2 = Wikilink(wrap([Text("foo")]), wrap([Text("bar")])) + node2 = Wikilink(wraptext("foo"), wraptext("bar")) self.assertEqual("[[foo|bar]]", str(node2)) def test_iternodes(self): @@ -57,8 +57,8 @@ class TestWikilink(TreeEqualityTestCase): def test_strip(self): """test Wikilink.__strip__()""" - node = Wikilink(wrap([Text("foobar")])) - node2 = Wikilink(wrap([Text("foo")]), wrap([Text("bar")])) + node = Wikilink(wraptext("foobar")) + node2 = Wikilink(wraptext("foo"), wraptext("bar")) for a in (True, False): for b in (True, False): self.assertEqual("foobar", node.__strip__(a, b)) @@ -70,8 +70,8 @@ class TestWikilink(TreeEqualityTestCase): getter, marker = object(), object() get = lambda code: output.append((getter, code)) mark = lambda: output.append(marker) - node1 = Wikilink(wrap([Text("foobar")])) - node2 = Wikilink(wrap([Text("foo")]), wrap([Text("bar")])) + node1 = Wikilink(wraptext("foobar")) + node2 = Wikilink(wraptext("foo"), wraptext("bar")) node1.__showtree__(output.append, get, mark) node2.__showtree__(output.append, get, mark) valid = [ @@ -81,26 +81,26 @@ class TestWikilink(TreeEqualityTestCase): def test_title(self): """test getter/setter for the title attribute""" - title = wrap([Text("foobar")]) + title = wraptext("foobar") node1 = Wikilink(title) - node2 = Wikilink(title, wrap([Text("baz")])) + node2 = Wikilink(title, wraptext("baz")) self.assertIs(title, node1.title) self.assertIs(title, node2.title) node1.title = "héhehé" node2.title = "héhehé" - self.assertWikicodeEqual(wrap([Text("héhehé")]), node1.title) - self.assertWikicodeEqual(wrap([Text("héhehé")]), node2.title) + self.assertWikicodeEqual(wraptext("héhehé"), node1.title) + self.assertWikicodeEqual(wraptext("héhehé"), node2.title) def test_text(self): """test getter/setter for the text attribute""" - text = wrap([Text("baz")]) - node1 = Wikilink(wrap([Text("foobar")])) - node2 = Wikilink(wrap([Text("foobar")]), text) + text = wraptext("baz") + node1 = Wikilink(wraptext("foobar")) + node2 = Wikilink(wraptext("foobar"), text) self.assertIs(None, node1.text) self.assertIs(text, node2.text) node1.text = "buzz" node2.text = None - self.assertWikicodeEqual(wrap([Text("buzz")]), node1.text) + self.assertWikicodeEqual(wraptext("buzz"), node1.text) self.assertIs(None, node2.text) if __name__ == "__main__": From 852c5ff9af1c91aef34b85b10afa59623a117271 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 4 May 2013 21:06:17 -0400 Subject: [PATCH 095/115] Start TestWikicode; make Wikicode.nodes's setter handle more inputs. --- mwparserfromhell/wikicode.py | 2 + tests/test_wikicode.py | 117 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100644 tests/test_wikicode.py diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index f2d9c89..c295fd6 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -162,6 +162,8 @@ class Wikicode(StringMixIn): @nodes.setter def nodes(self, value): + if not isinstance(value, list): + value = parse_anything(value).nodes self._nodes = value def get(self, index): diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py new file mode 100644 index 0000000..421a714 --- /dev/null +++ b/tests/test_wikicode.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2012-2013 Ben Kurtovic +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals +import unittest + +from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, + Tag, Template, Text, Wikilink) +from mwparserfromhell.smart_list import SmartList +from mwparserfromhell.wikicode import Wikicode +from mwparserfromhell import parse +from mwparserfromhell.compat import str + +from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext + +class TestWikicode(TreeEqualityTestCase): + """Tests for the Wikicode class, which manages a list of nodes.""" + + def test_unicode(self): + """test Wikicode.__unicode__()""" + code1 = parse("foobar") + code2 = parse("Have a {{template}} and a [[page|link]]") + self.assertEqual("foobar", str(code1)) + self.assertEqual("Have a {{template}} and a [[page|link]]", str(code2)) + + def test_nodes(self): + """test getter/setter for the nodes attribute""" + code = parse("Have a {{template}}") + self.assertEqual(["Have a ", "{{template}}"], code.nodes) + L1 = SmartList([Text("foobar"), Template(wraptext("abc"))]) + L2 = [Text("barfoo"), Template(wraptext("cba"))] + L3 = "abc{{def}}" + code.nodes = L1 + self.assertIs(L1, code.nodes) + code.nodes = L2 + self.assertIs(L2, code.nodes) + code.nodes = L3 + self.assertEqual(["abc", "{{def}}"], code.nodes) + self.assertRaises(ValueError, setattr, code, "nodes", object) + + def test_get(self): + """test Wikicode.get()""" + code = parse("Have a {{template}} and a [[page|link]]") + self.assertIs(code.nodes[0], code.get(0)) + self.assertIs(code.nodes[2], code.get(2)) + self.assertRaises(IndexError, code.get, 4) + + def test_set(self): + """test Wikicode.set()""" + pass + + def test_index(self): + """test Wikicode.index()""" + pass + + def test_insert(self): + """test Wikicode.insert()""" + pass + + def test_insert_before(self): + """test Wikicode.insert_before()""" + pass + + def test_insert_after(self): + """test Wikicode.insert_after()""" + pass + + def test_replace(self): + """test Wikicode.replace()""" + pass + + def test_append(self): + """test Wikicode.append()""" + pass + + def test_remove(self): + """test Wikicode.remove()""" + pass + + def test_filter_family(self): + """test the Wikicode.i?filter() family of functions""" + pass + + def test_get_sections(self): + """test Wikicode.get_sections()""" + pass + + def test_strip_code(self): + """test Wikicode.strip_code()""" + pass + + def test_get_tree(self): + """test Wikicode.get_tree()""" + pass + + +if __name__ == "__main__": + unittest.main(verbosity=2) From ee99e6eceb5e77dae0b786422a48893e4255a76c Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 6 May 2013 22:29:02 -0400 Subject: [PATCH 096/115] Fix a bug in Wikicode.set(); implement test_set() and test_index() --- mwparserfromhell/wikicode.py | 3 ++- tests/test_wikicode.py | 27 +++++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index c295fd6..e9bd133 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -184,9 +184,10 @@ class Wikicode(StringMixIn): raise ValueError("Cannot coerce multiple nodes into one index") if index >= len(self.nodes) or -1 * index > len(self.nodes): raise IndexError("List assignment index out of range") - self.nodes.pop(index) if nodes: self.nodes[index] = nodes[0] + else: + self.nodes.pop(index) def index(self, obj, recursive=False): """Return the index of *obj* in the list of nodes. diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 421a714..485ab99 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -66,11 +66,34 @@ class TestWikicode(TreeEqualityTestCase): def test_set(self): """test Wikicode.set()""" - pass + code = parse("Have a {{template}} and a [[page|link]]") + code.set(1, "{{{argument}}}") + self.assertEqual("Have a {{{argument}}} and a [[page|link]]", code) + self.assertIsInstance(code.get(1), Argument) + code.set(2, None) + self.assertEqual("Have a {{{argument}}}[[page|link]]", code) + code.set(-3, "This is an ") + self.assertEqual("This is an {{{argument}}}[[page|link]]", code) + self.assertRaises(ValueError, code.set, 1, "foo {{bar}}") + self.assertRaises(IndexError, code.set, 3, "{{baz}}") + self.assertRaises(IndexError, code.set, -4, "{{baz}}") def test_index(self): """test Wikicode.index()""" - pass + code = parse("Have a {{template}} and a [[page|link]]") + self.assertEqual(0, code.index("Have a ")) + self.assertEqual(3, code.index("[[page|link]]")) + self.assertEqual(1, code.index(code.get(1))) + self.assertRaises(ValueError, code.index, "foo") + + code = parse("{{foo}}{{bar|{{baz}}}}") + self.assertEqual(1, code.index("{{bar|{{baz}}}}")) + self.assertEqual(1, code.index("{{baz}}", recursive=True)) + self.assertEqual(1, code.index(code.get(1).get(1).value, + recursive=True)) + self.assertRaises(ValueError, code.index, "{{baz}}", recursive=False) + self.assertRaises(ValueError, code.index, + code.get(1).get(1).value, recursive=False) def test_insert(self): """test Wikicode.insert()""" From 3095a4203f7e7ca680da861d8b243a2284acfc93 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Wed, 8 May 2013 11:03:04 -0400 Subject: [PATCH 097/115] Finish tests for Wikicode's list-like methods; fix a bug. --- mwparserfromhell/wikicode.py | 2 +- tests/test_wikicode.py | 101 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 96 insertions(+), 7 deletions(-) diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index e9bd133..b704590 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -68,7 +68,7 @@ class Wikicode(StringMixIn): Raises ``ValueError`` if *obj* is not within *node*. """ for context, child in node.__iternodes__(self._get_all_nodes): - if child is obj: + if self._is_equivalent(obj, child): return context raise ValueError(obj) diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 485ab99..179d588 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -97,27 +97,116 @@ class TestWikicode(TreeEqualityTestCase): def test_insert(self): """test Wikicode.insert()""" - pass + code = parse("Have a {{template}} and a [[page|link]]") + code.insert(1, "{{{argument}}}") + self.assertEqual( + "Have a {{{argument}}}{{template}} and a [[page|link]]", code) + self.assertIsInstance(code.get(1), Argument) + code.insert(2, None) + self.assertEqual( + "Have a {{{argument}}}{{template}} and a [[page|link]]", code) + code.insert(-3, Text("foo")) + self.assertEqual( + "Have a {{{argument}}}foo{{template}} and a [[page|link]]", code) + + code2 = parse("{{foo}}{{bar}}{{baz}}") + code2.insert(1, "abc{{def}}ghi[[jk]]") + self.assertEqual("{{foo}}abc{{def}}ghi[[jk]]{{bar}}{{baz}}", code2) + self.assertEqual(["{{foo}}", "abc", "{{def}}", "ghi", "[[jk]]", + "{{bar}}", "{{baz}}"], code2.nodes) + + code3 = parse("{{foo}}bar") + code3.insert(1000, "[[baz]]") + code3.insert(-1000, "derp") + self.assertEqual("derp{{foo}}bar[[baz]]", code3) def test_insert_before(self): """test Wikicode.insert_before()""" - pass + code = parse("{{a}}{{b}}{{c}}{{d}}") + code.insert_before("{{b}}", "x", recursive=True) + code.insert_before("{{d}}", "[[y]]", recursive=False) + self.assertEqual("{{a}}x{{b}}{{c}}[[y]]{{d}}", code) + code.insert_before(code.get(2), "z") + self.assertEqual("{{a}}xz{{b}}{{c}}[[y]]{{d}}", code) + self.assertRaises(ValueError, code.insert_before, "{{r}}", "n", + recursive=True) + self.assertRaises(ValueError, code.insert_before, "{{r}}", "n", + recursive=False) + + code2 = parse("{{a|{{b}}|{{c|d={{f}}}}}}") + code2.insert_before(code2.get(0).params[0].value.get(0), "x", + recursive=True) + code2.insert_before("{{f}}", "y", recursive=True) + self.assertEqual("{{a|x{{b}}|{{c|d=y{{f}}}}}}", code2) + self.assertRaises(ValueError, code2.insert_before, "{{f}}", "y", + recursive=False) def test_insert_after(self): """test Wikicode.insert_after()""" - pass + code = parse("{{a}}{{b}}{{c}}{{d}}") + code.insert_after("{{b}}", "x", recursive=True) + code.insert_after("{{d}}", "[[y]]", recursive=False) + self.assertEqual("{{a}}{{b}}x{{c}}{{d}}[[y]]", code) + code.insert_after(code.get(2), "z") + self.assertEqual("{{a}}{{b}}xz{{c}}{{d}}[[y]]", code) + self.assertRaises(ValueError, code.insert_after, "{{r}}", "n", + recursive=True) + self.assertRaises(ValueError, code.insert_after, "{{r}}", "n", + recursive=False) + + code2 = parse("{{a|{{b}}|{{c|d={{f}}}}}}") + code2.insert_after(code2.get(0).params[0].value.get(0), "x", + recursive=True) + code2.insert_after("{{f}}", "y", recursive=True) + self.assertEqual("{{a|{{b}}x|{{c|d={{f}}y}}}}", code2) + self.assertRaises(ValueError, code2.insert_after, "{{f}}", "y", + recursive=False) def test_replace(self): """test Wikicode.replace()""" - pass + code = parse("{{a}}{{b}}{{c}}{{d}}") + code.replace("{{b}}", "x", recursive=True) + code.replace("{{d}}", "[[y]]", recursive=False) + self.assertEqual("{{a}}x{{c}}[[y]]", code) + code.replace(code.get(1), "z") + self.assertEqual("{{a}}z{{c}}[[y]]", code) + self.assertRaises(ValueError, code.replace, "{{r}}", "n", + recursive=True) + self.assertRaises(ValueError, code.replace, "{{r}}", "n", + recursive=False) + + code2 = parse("{{a|{{b}}|{{c|d={{f}}}}}}") + code2.replace(code2.get(0).params[0].value.get(0), "x", recursive=True) + code2.replace("{{f}}", "y", recursive=True) + self.assertEqual("{{a|x|{{c|d=y}}}}", code2) + self.assertRaises(ValueError, code2.replace, "y", "z", recursive=False) def test_append(self): """test Wikicode.append()""" - pass + code = parse("Have a {{template}}") + code.append("{{{argument}}}") + self.assertEqual("Have a {{template}}{{{argument}}}", code) + self.assertIsInstance(code.get(2), Argument) + code.append(None) + self.assertEqual("Have a {{template}}{{{argument}}}", code) + code.append(Text(" foo")) + self.assertEqual("Have a {{template}}{{{argument}}} foo", code) + self.assertRaises(ValueError, code.append, slice(0, 1)) def test_remove(self): """test Wikicode.remove()""" - pass + code = parse("{{a}}{{b}}{{c}}{{d}}") + code.remove("{{b}}", recursive=True) + code.remove(code.get(1), recursive=True) + self.assertEqual("{{a}}{{d}}", code) + self.assertRaises(ValueError, code.remove, "{{r}}", recursive=True) + self.assertRaises(ValueError, code.remove, "{{r}}", recursive=False) + + code2 = parse("{{a|{{b}}|{{c|d={{f}}{{h}}}}}}") + code2.remove(code2.get(0).params[0].value.get(0), recursive=True) + code2.remove("{{f}}", recursive=True) + self.assertEqual("{{a||{{c|d={{h}}}}}}", code2) + self.assertRaises(ValueError, code2.remove, "{{h}}", recursive=False) def test_filter_family(self): """test the Wikicode.i?filter() family of functions""" From 17ac79e79660e3775e3e06dde254d122515a08da Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 11 May 2013 15:58:45 -0400 Subject: [PATCH 098/115] Build filter methods dynamically. --- mwparserfromhell/wikicode.py | 97 ++++++++++++++------------------------------ 1 file changed, 31 insertions(+), 66 deletions(-) diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index b704590..4750094 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -23,7 +23,7 @@ from __future__ import unicode_literals import re -from .compat import maxsize, str +from .compat import maxsize, py3k, str from .nodes import Heading, Node, Tag, Template, Text, Wikilink from .string_mixin import StringMixIn from .utils import parse_anything @@ -291,46 +291,36 @@ class Wikicode(StringMixIn): *flags*. If *forcetype* is given, only nodes that are instances of this type are yielded. """ - if recursive: - nodes = self._get_all_nodes(self) - else: - nodes = self.nodes - for node in nodes: + for node in (self._get_all_nodes(self) if recursive else self.nodes): if not forcetype or isinstance(node, forcetype): if not matches or re.search(matches, str(node), flags): yield node - def ifilter_links(self, recursive=False, matches=None, flags=FLAGS): - """Iterate over wikilink nodes. - - This is equivalent to :py:meth:`ifilter` with *forcetype* set to - :py:class:`~.Wikilink`. - """ - return self.ifilter(recursive, matches, flags, forcetype=Wikilink) - - def ifilter_templates(self, recursive=False, matches=None, flags=FLAGS): - """Iterate over template nodes. - - This is equivalent to :py:meth:`ifilter` with *forcetype* set to - :py:class:`~.Template`. - """ - return self.filter(recursive, matches, flags, forcetype=Template) - - def ifilter_text(self, recursive=False, matches=None, flags=FLAGS): - """Iterate over text nodes. - - This is equivalent to :py:meth:`ifilter` with *forcetype* set to - :py:class:`~.nodes.Text`. + @classmethod + def _build_filter_methods(cls, meths): + """Given a dict of Node types, build corresponding i?filter shortcuts. + + The dict should be given as keys storing the method's base name paired + with values storing the corresponding :py:class:`~.Node` type. For + example, the dict may contain the pair ``("templates", Template)``, + which will produce the methods :py:meth:`ifilter_templates` and + :py:meth:`filter_templates`, which are shortcuts for + :py:meth:`ifilter(forcetype=Template) ` and + :py:meth:`filter(forcetype=Template) `, respectively. These + shortcuts are added to the class itself, with an appropriate docstring. """ - return self.filter(recursive, matches, flags, forcetype=Text) - - def ifilter_tags(self, recursive=False, matches=None, flags=FLAGS): - """Iterate over tag nodes. + doc = """Iterate over {0}. - This is equivalent to :py:meth:`ifilter` with *forcetype* set to - :py:class:`~.Tag`. + This is equivalent to :py:meth:`{1}` with *forcetype* set to + :py:class:`~.{2}`. """ - return self.ifilter(recursive, matches, flags, forcetype=Tag) + for name, forcetype in (meths.items() if py3k else meths.iteritems()): + ifil = lambda self, **kw: self.ifilter(forcetype=forcetype, **kw) + fil = lambda self, **kw: self.filter(forcetype=forcetype, **kw) + ifil.__doc__ = doc.format(name, "ifilter", forcetype) + fil.__doc__ = doc.format(name, "filter", forcetype) + setattr(cls, "ifilter_" + name, ifil) + setattr(cls, "filter_" + name, fil) def filter(self, recursive=False, matches=None, flags=FLAGS, forcetype=None): @@ -340,38 +330,6 @@ class Wikicode(StringMixIn): """ return list(self.ifilter(recursive, matches, flags, forcetype)) - def filter_links(self, recursive=False, matches=None, flags=FLAGS): - """Return a list of wikilink nodes. - - This is equivalent to calling :py:func:`list` on - :py:meth:`ifilter_links`. - """ - return list(self.ifilter_links(recursive, matches, flags)) - - def filter_templates(self, recursive=False, matches=None, flags=FLAGS): - """Return a list of template nodes. - - This is equivalent to calling :py:func:`list` on - :py:meth:`ifilter_templates`. - """ - return list(self.ifilter_templates(recursive, matches, flags)) - - def filter_text(self, recursive=False, matches=None, flags=FLAGS): - """Return a list of text nodes. - - This is equivalent to calling :py:func:`list` on - :py:meth:`ifilter_text`. - """ - return list(self.ifilter_text(recursive, matches, flags)) - - def filter_tags(self, recursive=False, matches=None, flags=FLAGS): - """Return a list of tag nodes. - - This is equivalent to calling :py:func:`list` on - :py:meth:`ifilter_tags`. - """ - return list(self.ifilter_tags(recursive, matches, flags)) - def get_sections(self, flat=True, matches=None, levels=None, flags=FLAGS, include_headings=True): """Return a list of sections within the page. @@ -470,3 +428,10 @@ class Wikicode(StringMixIn): """ marker = object() # Random object we can find with certainty in a list return "\n".join(self._get_tree(self, [], marker, 0)) + +Wikicode._build_filter_methods({ + "links": Wikilink, + "templates": Template, + "text": Text, + "tag": Tag + }) From f700914caf895ff7a6ac628797e7a337ee53e4be Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sat, 11 May 2013 19:21:24 -0400 Subject: [PATCH 099/115] Cleanup Wikicode's filter functions; implement test_filter_family(). --- mwparserfromhell/wikicode.py | 69 +++++++++++++++++++++++--------------------- tests/test_wikicode.py | 65 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 99 insertions(+), 35 deletions(-) diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 4750094..365eab7 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -24,7 +24,8 @@ from __future__ import unicode_literals import re from .compat import maxsize, py3k, str -from .nodes import Heading, Node, Tag, Template, Text, Wikilink +from .nodes import (Argument, Comment, Heading, HTMLEntity, Node, Tag, + Template, Text, Wikilink) from .string_mixin import StringMixIn from .utils import parse_anything @@ -151,6 +152,36 @@ class Wikicode(StringMixIn): node.__showtree__(write, get, mark) return lines + @classmethod + def _build_filter_methods(cls, **meths): + """Given Node types, build the corresponding i?filter shortcuts. + + The should be given as keys storing the method's base name paired + with values storing the corresponding :py:class:`~.Node` type. For + example, the dict may contain the pair ``("templates", Template)``, + which will produce the methods :py:meth:`ifilter_templates` and + :py:meth:`filter_templates`, which are shortcuts for + :py:meth:`ifilter(forcetype=Template) ` and + :py:meth:`filter(forcetype=Template) `, respectively. These + shortcuts are added to the class itself, with an appropriate docstring. + """ + doc = """Iterate over {0}. + + This is equivalent to :py:meth:`{1}` with *forcetype* set to + :py:class:`~.{2}`. + """ + make_ifilter = lambda ftype: (lambda self, **kw: + self.ifilter(forcetype=ftype, **kw)) + make_filter = lambda ftype: (lambda self, **kw: + self.filter(forcetype=ftype, **kw)) + for name, ftype in (meths.items() if py3k else meths.iteritems()): + ifilter = make_ifilter(ftype) + filter = make_filter(ftype) + ifilter.__doc__ = doc.format(name, "ifilter", ftype.__name__) + filter.__doc__ = doc.format(name, "filter", ftype.__name__) + setattr(cls, "ifilter_" + name, ifilter) + setattr(cls, "filter_" + name, filter) + @property def nodes(self): """A list of :py:class:`~.Node` objects. @@ -296,32 +327,6 @@ class Wikicode(StringMixIn): if not matches or re.search(matches, str(node), flags): yield node - @classmethod - def _build_filter_methods(cls, meths): - """Given a dict of Node types, build corresponding i?filter shortcuts. - - The dict should be given as keys storing the method's base name paired - with values storing the corresponding :py:class:`~.Node` type. For - example, the dict may contain the pair ``("templates", Template)``, - which will produce the methods :py:meth:`ifilter_templates` and - :py:meth:`filter_templates`, which are shortcuts for - :py:meth:`ifilter(forcetype=Template) ` and - :py:meth:`filter(forcetype=Template) `, respectively. These - shortcuts are added to the class itself, with an appropriate docstring. - """ - doc = """Iterate over {0}. - - This is equivalent to :py:meth:`{1}` with *forcetype* set to - :py:class:`~.{2}`. - """ - for name, forcetype in (meths.items() if py3k else meths.iteritems()): - ifil = lambda self, **kw: self.ifilter(forcetype=forcetype, **kw) - fil = lambda self, **kw: self.filter(forcetype=forcetype, **kw) - ifil.__doc__ = doc.format(name, "ifilter", forcetype) - fil.__doc__ = doc.format(name, "filter", forcetype) - setattr(cls, "ifilter_" + name, ifil) - setattr(cls, "filter_" + name, fil) - def filter(self, recursive=False, matches=None, flags=FLAGS, forcetype=None): """Return a list of nodes within our list matching certain conditions. @@ -429,9 +434,7 @@ class Wikicode(StringMixIn): marker = object() # Random object we can find with certainty in a list return "\n".join(self._get_tree(self, [], marker, 0)) -Wikicode._build_filter_methods({ - "links": Wikilink, - "templates": Template, - "text": Text, - "tag": Tag - }) +Wikicode._build_filter_methods( + arguments=Argument, comments=Comment, headings=Heading, + html_entities=HTMLEntity, tags=Tag, templates=Template, text=Text, + wikilinks=Wikilink) diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 179d588..69600c4 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -21,6 +21,8 @@ # SOFTWARE. from __future__ import unicode_literals +import re +from types import GeneratorType import unittest from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, @@ -210,7 +212,67 @@ class TestWikicode(TreeEqualityTestCase): def test_filter_family(self): """test the Wikicode.i?filter() family of functions""" - pass + def genlist(gen): + self.assertIsInstance(gen, GeneratorType) + return list(gen) + ifilter = lambda code: (lambda **kw: genlist(code.ifilter(**kw))) + + code = parse("a{{b}}c[[d]]{{{e}}}{{f}}[[g]]") + for func in (code.filter, ifilter(code)): + self.assertEqual(["a", "{{b}}", "c", "[[d]]", "{{{e}}}", "{{f}}", + "[[g]]"], func()) + self.assertEqual(["{{{e}}}"], func(forcetype=Argument)) + self.assertIs(code.get(4), func(forcetype=Argument)[0]) + self.assertEqual(["a", "c"], func(forcetype=Text)) + self.assertEqual([], func(forcetype=Heading)) + self.assertRaises(TypeError, func, forcetype=True) + + funcs = [ + lambda name, **kw: getattr(code, "filter_" + name)(**kw), + lambda name, **kw: genlist(getattr(code, "ifilter_" + name)(**kw)) + ] + for get_filter in funcs: + self.assertEqual(["{{{e}}}"], get_filter("arguments")) + self.assertIs(code.get(4), get_filter("arguments")[0]) + self.assertEqual([], get_filter("comments")) + self.assertEqual([], get_filter("headings")) + self.assertEqual([], get_filter("html_entities")) + self.assertEqual([], get_filter("tags")) + self.assertEqual(["{{b}}", "{{f}}"], get_filter("templates")) + self.assertEqual(["a", "c"], get_filter("text")) + self.assertEqual(["[[d]]", "[[g]]"], get_filter("wikilinks")) + + code2 = parse("{{a|{{b}}|{{c|d={{f}}{{h}}}}}}") + for func in (code2.filter, ifilter(code2)): + self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"], + func(recursive=False, forcetype=Template)) + self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}", + "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"], + func(recursive=True, forcetype=Template)) + + code3 = parse("{{foobar}}{{FOO}}{{baz}}{{bz}}") + for func in (code3.filter, ifilter(code3)): + self.assertEqual(["{{foobar}}", "{{FOO}}"], func(matches=r"foo")) + self.assertEqual(["{{foobar}}", "{{FOO}}"], + func(matches=r"^{{foo.*?}}")) + self.assertEqual(["{{foobar}}"], + func(matches=r"^{{foo.*?}}", flags=re.UNICODE)) + self.assertEqual(["{{baz}}", "{{bz}}"], func(matches=r"^{{b.*?z")) + self.assertEqual(["{{baz}}"], func(matches=r"^{{b.+?z}}")) + + self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"], + code2.filter_templates(recursive=False)) + self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}", + "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"], + code2.filter_templates(recursive=True)) + self.assertEqual(["{{baz}}", "{{bz}}"], + code3.filter_templates(matches=r"^{{b.*?z")) + self.assertEqual([], code3.filter_tags(matches=r"^{{b.*?z")) + self.assertEqual([], code3.filter_tags(matches=r"^{{b.*?z", flags=0)) + + self.assertRaises(TypeError, code.filter_templates, 100) + self.assertRaises(TypeError, code.filter_templates, a=42) + self.assertRaises(TypeError, code.filter_templates, forcetype=Template) def test_get_sections(self): """test Wikicode.get_sections()""" @@ -224,6 +286,5 @@ class TestWikicode(TreeEqualityTestCase): """test Wikicode.get_tree()""" pass - if __name__ == "__main__": unittest.main(verbosity=2) From 0b56f2e2673339c2a096928168f64666b595ab53 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 13 May 2013 02:39:00 -0400 Subject: [PATCH 100/115] Improve Wikicode.get_sections(); implement test_get_tree(); part of test_get_sections() --- mwparserfromhell/wikicode.py | 32 +++++++++++++++++--------------- tests/test_wikicode.py | 44 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 57 insertions(+), 19 deletions(-) diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index 365eab7..f258921 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -335,34 +335,36 @@ class Wikicode(StringMixIn): """ return list(self.ifilter(recursive, matches, flags, forcetype)) - def get_sections(self, flat=True, matches=None, levels=None, flags=FLAGS, - include_headings=True): + def get_sections(self, levels=None, matches=None, flags=FLAGS, + include_lead=True, include_headings=True): """Return a list of sections within the page. Sections are returned as :py:class:`~.Wikicode` objects with a shared node list (implemented using :py:class:`~.SmartList`) so that changes to sections are reflected in the parent Wikicode object. - With *flat* as ``True``, each returned section contains all of its - subsections within the :py:class:`~.Wikicode`; otherwise, the returned - sections contain only the section up to the next heading, regardless of - its size. If *matches* is given, it should be a regex to be matched - against the titles of section headings; only sections whose headings - match the regex will be included. If *levels* is given, it should be a - iterable of integers; only sections whose heading levels are within it - will be returned. If *include_headings* is ``True``, the section's - beginning :py:class:`~.Heading` object will be included in returned - :py:class:`~.Wikicode` objects; otherwise, this is skipped. + Each section contains all of its subsections. If *levels* is given, it + should be a iterable of integers; only sections whose heading levels + are within it will be returned.If *matches* is given, it should be a + regex to be matched against the titles of section headings; only + sections whose headings match the regex will be included. *flags* can + be used to override the default regex flags (see :py:meth:`ifilter`) if + *matches* is used. + + If *include_lead* is ``True``, the first, lead section (without a + heading) will be included in the list. If *include_headings* is + ``True``, the section's beginning :py:class:`~.Heading` object will be + included; otherwise, this is skipped. """ if matches: matches = r"^(=+?)\s*" + matches + r"\s*\1$" - headings = self.filter(recursive=True, matches=matches, flags=flags, - forcetype=Heading) + headings = self.filter_headings(recursive=True, matches=matches, + flags=flags) if levels: headings = [head for head in headings if head.level in levels] sections = [] - buffers = [(maxsize, 0)] + buffers = [(maxsize, 0)] if include_lead else [] i = 0 while i < len(self.nodes): if self.nodes[i] in headings: diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 69600c4..4aa07f1 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -26,11 +26,11 @@ from types import GeneratorType import unittest from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, - Tag, Template, Text, Wikilink) + Node, Tag, Template, Text, Wikilink) from mwparserfromhell.smart_list import SmartList from mwparserfromhell.wikicode import Wikicode from mwparserfromhell import parse -from mwparserfromhell.compat import str +from mwparserfromhell.compat import py3k, str from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext @@ -276,7 +276,37 @@ class TestWikicode(TreeEqualityTestCase): def test_get_sections(self): """test Wikicode.get_sections()""" - pass + page1 = "" + page2 = "==Heading==" + page3 = "===Heading===\nFoo bar baz\n====Gnidaeh====\n" + page4 = """ +This is a lead. +== Section I == +Section I body. {{and a|template}} +=== Section I.A === +Section I.A [[body]]. +=== Section I.B === +==== Section I.B.1 ==== +Section I.B.1 body. + +•Some content. + +== Section II == +Section II body. + +== Section III == +=== Section III.A === +Text. +===== Section III.A.1.a ===== +More text. +==== Section III.A.2 ==== +Even more text. +======= section III.A.2.a.i.1 ======= +An invalid section!""" + + self.assertEqual([], parse(page1).get_sections()) + self.assertEqual(["==Heading=="], parse(page2).get_sections()) + self.assertEqual(["===Heading===\nFoo bar baz\n", "====Gnidaeh====\n"], parse(page2).get_sections()) def test_strip_code(self): """test Wikicode.strip_code()""" @@ -284,7 +314,13 @@ class TestWikicode(TreeEqualityTestCase): def test_get_tree(self): """test Wikicode.get_tree()""" - pass + # Since individual nodes have test cases for their __showtree___ + # methods, and the docstring covers all possibilities, this doesn't + # need to test anything other than it: + code = parse("Lorem ipsum {{foo|bar|{{baz}}|spam=eggs}}") + expected = "Lorem ipsum \n{{\n\t foo\n\t| 1\n\t= bar\n\t| 2\n\t= " + \ + "{{\n\t\t\tbaz\n\t }}\n\t| spam\n\t= eggs\n}}" + self.assertEqual(expected.expandtabs(4), code.get_tree()) if __name__ == "__main__": unittest.main(verbosity=2) From 35acc1b812edf46bebcd19c753e170a288c20dc3 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 13 May 2013 18:10:06 -0400 Subject: [PATCH 101/115] Fix a couple bugs. --- mwparserfromhell/wikicode.py | 2 +- tests/test_wikicode.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mwparserfromhell/wikicode.py b/mwparserfromhell/wikicode.py index f258921..1d5de5d 100644 --- a/mwparserfromhell/wikicode.py +++ b/mwparserfromhell/wikicode.py @@ -370,7 +370,7 @@ class Wikicode(StringMixIn): if self.nodes[i] in headings: this = self.nodes[i].level for (level, start) in buffers: - if not flat or this <= level: + if this <= level: buffers.remove((level, start)) sections.append(Wikicode(self.nodes[start:i])) buffers.append((this, i)) diff --git a/tests/test_wikicode.py b/tests/test_wikicode.py index 4aa07f1..1eacb11 100644 --- a/tests/test_wikicode.py +++ b/tests/test_wikicode.py @@ -305,8 +305,8 @@ Even more text. An invalid section!""" self.assertEqual([], parse(page1).get_sections()) - self.assertEqual(["==Heading=="], parse(page2).get_sections()) - self.assertEqual(["===Heading===\nFoo bar baz\n", "====Gnidaeh====\n"], parse(page2).get_sections()) + self.assertEqual(["", "==Heading=="], parse(page2).get_sections()) + self.assertEqual(["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n", "====Gnidaeh====\n"], parse(page3).get_sections()) def test_strip_code(self): """test Wikicode.strip_code()""" From 9ede1121ba4caa547a85a9e71eac4171f95eefa3 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Mon, 13 May 2013 18:44:21 -0400 Subject: [PATCH 102/115] Fix tokenizer.c on Windows; add another template test (#25) Mostly by @gdooms, with tweaks. --- mwparserfromhell/parser/tokenizer.c | 47 +++++++++++++++++++++++-------------- mwparserfromhell/parser/tokenizer.h | 1 + tests/tokenizer/templates.mwtest | 9 ++++++- 3 files changed, 39 insertions(+), 18 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index 875263c..1fd4804 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -23,6 +23,11 @@ SOFTWARE. #include "tokenizer.h" +double log2(double n) +{ + return log(n) / log(2); +} + static PyObject* Tokenizer_new(PyTypeObject* type, PyObject* args, PyObject* kwds) { @@ -52,8 +57,9 @@ Textbuffer_new(void) static void Tokenizer_dealloc(Tokenizer* self) { - Py_XDECREF(self->text); struct Stack *this = self->topstack, *next; + Py_XDECREF(self->text); + while (this) { Py_DECREF(this->stack); Textbuffer_dealloc(this->textbuffer); @@ -139,20 +145,21 @@ Textbuffer_render(struct Textbuffer* self) static int Tokenizer_push_textbuffer(Tokenizer* self) { + PyObject *text, *kwargs, *token; struct Textbuffer* buffer = self->topstack->textbuffer; if (buffer->size == 0 && !buffer->next) return 0; - PyObject* text = Textbuffer_render(buffer); + text = Textbuffer_render(buffer); if (!text) return -1; - PyObject* kwargs = PyDict_New(); + kwargs = PyDict_New(); if (!kwargs) { Py_DECREF(text); return -1; } PyDict_SetItemString(kwargs, "text", text); Py_DECREF(text); - PyObject* token = PyObject_Call(Text, NOARGS, kwargs); + token = PyObject_Call(Text, NOARGS, kwargs); Py_DECREF(kwargs); if (!token) return -1; @@ -185,9 +192,10 @@ Tokenizer_delete_top_of_stack(Tokenizer* self) static PyObject* Tokenizer_pop(Tokenizer* self) { + PyObject* stack; if (Tokenizer_push_textbuffer(self)) return NULL; - PyObject* stack = self->topstack->stack; + stack = self->topstack->stack; Py_INCREF(stack); Tokenizer_delete_top_of_stack(self); return stack; @@ -200,11 +208,13 @@ Tokenizer_pop(Tokenizer* self) static PyObject* Tokenizer_pop_keeping_context(Tokenizer* self) { + PyObject* stack; + int context; if (Tokenizer_push_textbuffer(self)) return NULL; - PyObject* stack = self->topstack->stack; + stack = self->topstack->stack; Py_INCREF(stack); - int context = self->topstack->context; + context = self->topstack->context; Tokenizer_delete_top_of_stack(self); self->topstack->context = context; return stack; @@ -376,9 +386,10 @@ Tokenizer_read(Tokenizer* self, Py_ssize_t delta) static PyObject* Tokenizer_read_backwards(Tokenizer* self, Py_ssize_t delta) { + Py_ssize_t index; if (delta > self->head) return EMPTY; - Py_ssize_t index = self->head - delta; + index = self->head - delta; return PyList_GET_ITEM(self->text, index); } @@ -392,7 +403,7 @@ Tokenizer_parse_template_or_argument(Tokenizer* self) PyObject *tokenlist; self->head += 2; - while (Tokenizer_READ(self, 0) == *"{") { + while (Tokenizer_READ(self, 0) == *"{" && braces < MAX_BRACES) { self->head++; braces++; } @@ -423,8 +434,8 @@ Tokenizer_parse_template_or_argument(Tokenizer* self) if (Tokenizer_parse_template(self)) return -1; if (BAD_ROUTE) { + char text[MAX_BRACES]; RESET_ROUTE(); - char text[braces + 1]; for (i = 0; i < braces; i++) text[i] = *"{"; text[braces] = *""; if (Tokenizer_write_text_then_stack(self, text)) { @@ -635,9 +646,10 @@ Tokenizer_handle_template_end(Tokenizer* self) static int Tokenizer_handle_argument_separator(Tokenizer* self) { + PyObject* token; self->topstack->context ^= LC_ARGUMENT_NAME; self->topstack->context |= LC_ARGUMENT_DEFAULT; - PyObject* token = PyObject_CallObject(ArgumentSeparator, NULL); + token = PyObject_CallObject(ArgumentSeparator, NULL); if (!token) return -1; if (Tokenizer_write(self, token)) { @@ -654,8 +666,8 @@ Tokenizer_handle_argument_separator(Tokenizer* self) static PyObject* Tokenizer_handle_argument_end(Tokenizer* self) { - self->head += 2; PyObject* stack = Tokenizer_pop(self); + self->head += 2; return stack; } @@ -716,9 +728,10 @@ Tokenizer_parse_wikilink(Tokenizer* self) static int Tokenizer_handle_wikilink_separator(Tokenizer* self) { + PyObject* token; self->topstack->context ^= LC_WIKILINK_TITLE; self->topstack->context |= LC_WIKILINK_TEXT; - PyObject* token = PyObject_CallObject(WikilinkSeparator, NULL); + token = PyObject_CallObject(WikilinkSeparator, NULL); if (!token) return -1; if (Tokenizer_write(self, token)) { @@ -735,8 +748,8 @@ Tokenizer_handle_wikilink_separator(Tokenizer* self) static PyObject* Tokenizer_handle_wikilink_end(Tokenizer* self) { - self->head += 1; PyObject* stack = Tokenizer_pop(self); + self->head += 1; return stack; } @@ -1093,9 +1106,9 @@ Tokenizer_parse_comment(Tokenizer* self) self->head += 4; comment = Tokenizer_parse(self, LC_COMMENT); if (BAD_ROUTE) { + const char* text = "" +output: [CommentStart(), CommentEnd()] + +--- + +name: basic +label: a basic comment +input: "" +output: [CommentStart(), Text(text=" comment "), CommentEnd()] + +--- + +name: tons_of_nonsense +label: a comment with tons of ignorable garbage in it +input: "" +output: [CommentStart(), Text(text=" foo{{bar}}[[basé\n\n]{}{}{}{}]{{{{{{haha{{--a>aabsp;" +output: [Text(text="&n"), CommentStart(), Text(text="foo"), CommentEnd(), Text(text="bsp;")] + +--- + +name: wildcard +label: a wildcard assortment of various things +input: "{{{{{{{{foo}}bar|baz=biz}}buzz}}usr|{{bin}}}}" +output: [TemplateOpen(), TemplateOpen(), TemplateOpen(), TemplateOpen(), Text(text="foo"), TemplateClose(), Text(text="bar"), TemplateParamSeparator(), Text(text="baz"), TemplateParamEquals(), Text(text="biz"), TemplateClose(), Text(text="buzz"), TemplateClose(), Text(text="usr"), TemplateParamSeparator(), TemplateOpen(), Text(text="bin"), TemplateClose(), TemplateClose()] + +--- + +name: wildcard_redux +label: an even wilder assortment of various things +input: "{{a|b|{{c|[[d]]{{{e}}}}}}}[[f|{{{g}}}]]{{i|j= }}" +output: [TemplateOpen(), Text(text="a"), TemplateParamSeparator(), Text(text="b"), TemplateParamSeparator(), TemplateOpen(), Text(text="c"), TemplateParamSeparator(), WikilinkOpen(), Text(text="d"), WikilinkClose(), ArgumentOpen(), Text(text="e"), ArgumentClose(), TemplateClose(), TemplateClose(), WikilinkOpen(), Text(text="f"), WikilinkSeparator(), ArgumentOpen(), Text(text="g"), ArgumentClose(), CommentStart(), Text(text="h"), CommentEnd(), WikilinkClose(), TemplateOpen(), Text(text="i"), TemplateParamSeparator(), Text(text="j"), TemplateParamEquals(), HTMLEntityStart(), Text(text="nbsp"), HTMLEntityEnd(), TemplateClose()] From 22e869b1429dabd30976e4bdb8b819ed240c3f29 Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 19 May 2013 01:45:09 -0400 Subject: [PATCH 115/115] Fix a failing HTML entity test in the C tokenizer. Remove some extraneous whitespace in string_mixin.py. --- mwparserfromhell/parser/tokenizer.c | 19 +++++++++++++++++-- mwparserfromhell/string_mixin.py | 1 - 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/mwparserfromhell/parser/tokenizer.c b/mwparserfromhell/parser/tokenizer.c index df0882e..939f30c 100644 --- a/mwparserfromhell/parser/tokenizer.c +++ b/mwparserfromhell/parser/tokenizer.c @@ -911,8 +911,8 @@ Tokenizer_really_parse_entity(Tokenizer* self) { PyObject *token, *kwargs, *textobj; Py_UNICODE this; - int numeric, hexadecimal, i, j, test; - char *valid, *text, *def; + int numeric, hexadecimal, i, j, zeroes, test; + char *valid, *text, *buffer, *def; #define FAIL_ROUTE_AND_EXIT() { \ Tokenizer_fail_route(self); \ @@ -984,6 +984,7 @@ Tokenizer_really_parse_entity(Tokenizer* self) return -1; } i = 0; + zeroes = 0; while (1) { this = Tokenizer_READ(self, 0); if (this == *";") { @@ -992,6 +993,7 @@ Tokenizer_really_parse_entity(Tokenizer* self) break; } if (i == 0 && this == *"0") { + zeroes++; self->head++; continue; } @@ -1029,6 +1031,19 @@ Tokenizer_really_parse_entity(Tokenizer* self) i++; } } + if (zeroes) { + buffer = calloc(strlen(text) + zeroes + 1, sizeof(char)); + if (!buffer) { + free(text); + PyErr_NoMemory(); + return -1; + } + for (i = 0; i < zeroes; i++) + strcat(buffer, "0"); + strcat(buffer, text); + free(text); + text = buffer; + } textobj = PyUnicode_FromString(text); if (!textobj) { free(text); diff --git a/mwparserfromhell/string_mixin.py b/mwparserfromhell/string_mixin.py index 6bee9c4..89c1bc0 100644 --- a/mwparserfromhell/string_mixin.py +++ b/mwparserfromhell/string_mixin.py @@ -40,7 +40,6 @@ def inheritdoc(method): method.__doc__ = getattr(str, method.__name__).__doc__ return method - class StringMixIn(object): """Implement the interface for ``unicode``/``str`` in a dynamic manner.