@@ -1,5 +1,6 @@ | |||||
language: python | language: python | ||||
python: | python: | ||||
- "2.6" | |||||
- "2.7" | - "2.7" | ||||
- "3.2" | - "3.2" | ||||
- "3.3" | - "3.3" | ||||
@@ -1,3 +1,21 @@ | |||||
v0.3.3 (released April 22, 2014): | |||||
- Added support for Python 2.6 and 3.4. | |||||
- Template.has() is now passed 'ignore_empty=False' by default instead of True. | |||||
This fixes a bug when adding parameters to templates with empty fields, and | |||||
is a breaking change if you rely on the default behavior. | |||||
- The 'matches' argument of Wikicode's filter methods now accepts a function | |||||
(taking one argument, a Node, and returning a bool) in addition to a regex. | |||||
- Re-added 'flat' argument to Wikicode.get_sections(), fixed the order in which | |||||
it returns sections, and made it faster. | |||||
- Wikicode.matches() now accepts a tuple or list of strings/Wikicode objects | |||||
instead of just a single string or Wikicode. | |||||
- Given the frequency of issues with the (admittedly insufficient) tag parser, | |||||
there's a temporary skip_style_tags argument to parse() that ignores '' and | |||||
''' until these issues are corrected. | |||||
- Fixed a parser bug involving nested wikilinks and external links. | |||||
- C code cleanup and speed improvements. | |||||
v0.3.2 (released September 1, 2013): | v0.3.2 (released September 1, 2013): | ||||
- Added support for Python 3.2 (along with current support for 3.3 and 2.7). | - Added support for Python 3.2 (along with current support for 3.3 and 2.7). | ||||
@@ -1,4 +1,4 @@ | |||||
Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy | Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
of this software and associated documentation files (the "Software"), to deal | of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,32 @@ | |||||
Changelog | Changelog | ||||
========= | ========= | ||||
v0.3.3 | |||||
------ | |||||
`Released April 22, 2014 <https://github.com/earwig/mwparserfromhell/tree/v0.3.3>`_ | |||||
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.3.2...v0.3.3>`__): | |||||
- Added support for Python 2.6 and 3.4. | |||||
- :py:meth:`.Template.has` is now passed *ignore_empty=False* by default | |||||
instead of *True*. This fixes a bug when adding parameters to templates with | |||||
empty fields, **and is a breaking change if you rely on the default | |||||
behavior.** | |||||
- The *matches* argument of :py:class:`Wikicode's <.Wikicode>` | |||||
:py:meth:`.filter` methods now accepts a function (taking one argument, a | |||||
:py:class:`.Node`, and returning a bool) in addition to a regex. | |||||
- Re-added *flat* argument to :py:meth:`.Wikicode.get_sections`, fixed the | |||||
order in which it returns sections, and made it faster. | |||||
- :py:meth:`.Wikicode.matches` now accepts a tuple or list of | |||||
strings/:py:class:`.Wikicode` objects instead of just a single string or | |||||
:py:class:`.Wikicode`. | |||||
- Given the frequency of issues with the (admittedly insufficient) tag parser, | |||||
there's a temporary *skip_style_tags* argument to | |||||
:py:meth:`~.Parser.parse` that ignores ``''`` and ``'''`` until these issues | |||||
are corrected. | |||||
- Fixed a parser bug involving nested wikilinks and external links. | |||||
- C code cleanup and speed improvements. | |||||
v0.3.2 | v0.3.2 | ||||
------ | ------ | ||||
@@ -42,7 +42,7 @@ master_doc = 'index' | |||||
# General information about the project. | # General information about the project. | ||||
project = u'mwparserfromhell' | project = u'mwparserfromhell' | ||||
copyright = u'2012, 2013 Ben Kurtovic' | |||||
copyright = u'2012, 2013, 2014 Ben Kurtovic' | |||||
# The version info for the project you're documenting, acts as replacement for | # The version info for the project you're documenting, acts as replacement for | ||||
# |version| and |release|, also used in various other places throughout the | # |version| and |release|, also used in various other places throughout the | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -29,10 +29,10 @@ outrageously powerful parser for `MediaWiki <http://mediawiki.org>`_ wikicode. | |||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
__author__ = "Ben Kurtovic" | __author__ = "Ben Kurtovic" | ||||
__copyright__ = "Copyright (C) 2012, 2013 Ben Kurtovic" | |||||
__copyright__ = "Copyright (C) 2012, 2013, 2014 Ben Kurtovic" | |||||
__license__ = "MIT License" | __license__ = "MIT License" | ||||
__version__ = "0.3.2" | |||||
__email__ = "ben.kurtovic@verizon.net" | |||||
__version__ = "0.3.3" | |||||
__email__ = "ben.kurtovic@gmail.com" | |||||
from . import (compat, definitions, nodes, parser, smart_list, string_mixin, | from . import (compat, definitions, nodes, parser, smart_list, string_mixin, | ||||
utils, wikicode) | utils, wikicode) | ||||
@@ -10,18 +10,21 @@ types are meant to be imported directly from within the parser's modules. | |||||
import sys | import sys | ||||
py3k = sys.version_info.major == 3 | |||||
py32 = py3k and sys.version_info.minor == 2 | |||||
py26 = (sys.version_info[0] == 2) and (sys.version_info[1] == 6) | |||||
py3k = (sys.version_info[0] == 3) | |||||
py32 = py3k and (sys.version_info[1] == 2) | |||||
if py3k: | if py3k: | ||||
bytes = bytes | bytes = bytes | ||||
str = str | str = str | ||||
range = range | |||||
maxsize = sys.maxsize | maxsize = sys.maxsize | ||||
import html.entities as htmlentities | import html.entities as htmlentities | ||||
else: | else: | ||||
bytes = str | bytes = str | ||||
str = unicode | str = unicode | ||||
range = xrange | |||||
maxsize = sys.maxint | maxsize = sys.maxint | ||||
import htmlentitydefs as htmlentities | import htmlentitydefs as htmlentities | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -42,21 +42,21 @@ class Node(StringMixIn): | |||||
:py:meth:`__unicode__` must be overridden. It should return a ``unicode`` | :py:meth:`__unicode__` must be overridden. It should return a ``unicode`` | ||||
or (``str`` in py3k) representation of the node. If the node contains | or (``str`` in py3k) representation of the node. If the node contains | ||||
:py:class:`~.Wikicode` objects inside of it, :py:meth:`__iternodes__` | |||||
should be overridden to yield tuples of (``wikicode``, | |||||
``node_in_wikicode``) for each node in each wikicode, as well as the node | |||||
itself (``None``, ``self``). If the node is printable, :py:meth:`__strip__` | |||||
should be overridden to return the printable version of the node - it does | |||||
not have to be a string, but something that can be converted to a string | |||||
with ``str()``. Finally, :py:meth:`__showtree__` can be overridden to build | |||||
a nice tree representation of the node, if desired, for | |||||
:py:class:`~.Wikicode` objects inside of it, :py:meth:`__children__` | |||||
should be a generator that iterates over them. If the node is printable | |||||
(shown when the page is rendered), :py:meth:`__strip__` should return its | |||||
printable version, stripping out any formatting marks. It does not have to | |||||
return a string, but something that can be converted to a string with | |||||
``str()``. Finally, :py:meth:`__showtree__` can be overridden to build a | |||||
nice tree representation of the node, if desired, for | |||||
:py:meth:`~.Wikicode.get_tree`. | :py:meth:`~.Wikicode.get_tree`. | ||||
""" | """ | ||||
def __unicode__(self): | def __unicode__(self): | ||||
raise NotImplementedError() | raise NotImplementedError() | ||||
def __iternodes__(self, getter): | |||||
yield None, self | |||||
def __children__(self): | |||||
return # Funny generator-that-yields-nothing syntax | |||||
yield | |||||
def __strip__(self, normalize, collapse): | def __strip__(self, normalize, collapse): | ||||
return None | return None | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -42,13 +42,10 @@ class Argument(Node): | |||||
return start + "|" + str(self.default) + "}}}" | return start + "|" + str(self.default) + "}}}" | ||||
return start + "}}}" | return start + "}}}" | ||||
def __iternodes__(self, getter): | |||||
yield None, self | |||||
for child in getter(self.name): | |||||
yield self.name, child | |||||
def __children__(self): | |||||
yield self.name | |||||
if self.default is not None: | if self.default is not None: | ||||
for child in getter(self.default): | |||||
yield self.default, child | |||||
yield self.default | |||||
def __strip__(self, normalize, collapse): | def __strip__(self, normalize, collapse): | ||||
if self.default is not None: | if self.default is not None: | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -44,13 +44,10 @@ class ExternalLink(Node): | |||||
return "[" + str(self.url) + "]" | return "[" + str(self.url) + "]" | ||||
return str(self.url) | return str(self.url) | ||||
def __iternodes__(self, getter): | |||||
yield None, self | |||||
for child in getter(self.url): | |||||
yield self.url, child | |||||
def __children__(self): | |||||
yield self.url | |||||
if self.title is not None: | if self.title is not None: | ||||
for child in getter(self.title): | |||||
yield self.title, child | |||||
yield self.title | |||||
def __strip__(self, normalize, collapse): | def __strip__(self, normalize, collapse): | ||||
if self.brackets: | if self.brackets: | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -39,10 +39,8 @@ class Heading(Node): | |||||
def __unicode__(self): | def __unicode__(self): | ||||
return ("=" * self.level) + str(self.title) + ("=" * self.level) | return ("=" * self.level) + str(self.title) + ("=" * self.level) | ||||
def __iternodes__(self, getter): | |||||
yield None, self | |||||
for child in getter(self.title): | |||||
yield self.title, child | |||||
def __children__(self): | |||||
yield self.title | |||||
def __strip__(self, normalize, collapse): | def __strip__(self, normalize, collapse): | ||||
return self.title.strip_code(normalize, collapse) | return self.title.strip_code(normalize, collapse) | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -70,23 +70,17 @@ class Tag(Node): | |||||
result += "</" + str(self.closing_tag) + ">" | result += "</" + str(self.closing_tag) + ">" | ||||
return result | return result | ||||
def __iternodes__(self, getter): | |||||
yield None, self | |||||
def __children__(self): | |||||
if not self.wiki_markup: | if not self.wiki_markup: | ||||
for child in getter(self.tag): | |||||
yield self.tag, child | |||||
yield self.tag | |||||
for attr in self.attributes: | for attr in self.attributes: | ||||
for child in getter(attr.name): | |||||
yield attr.name, child | |||||
if attr.value: | |||||
for child in getter(attr.value): | |||||
yield attr.value, child | |||||
yield attr.name | |||||
if attr.value is not None: | |||||
yield attr.value | |||||
if self.contents: | if self.contents: | ||||
for child in getter(self.contents): | |||||
yield self.contents, child | |||||
yield self.contents | |||||
if not self.self_closing and not self.wiki_markup and self.closing_tag: | if not self.self_closing and not self.wiki_markup and self.closing_tag: | ||||
for child in getter(self.closing_tag): | |||||
yield self.closing_tag, child | |||||
yield self.closing_tag | |||||
def __strip__(self, normalize, collapse): | def __strip__(self, normalize, collapse): | ||||
if self.contents and is_visible(self.tag): | if self.contents and is_visible(self.tag): | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -26,7 +26,7 @@ import re | |||||
from . import HTMLEntity, Node, Text | from . import HTMLEntity, Node, Text | ||||
from .extras import Parameter | from .extras import Parameter | ||||
from ..compat import str | |||||
from ..compat import range, str | |||||
from ..utils import parse_anything | from ..utils import parse_anything | ||||
__all__ = ["Template"] | __all__ = ["Template"] | ||||
@@ -51,16 +51,12 @@ class Template(Node): | |||||
else: | else: | ||||
return "{{" + str(self.name) + "}}" | return "{{" + str(self.name) + "}}" | ||||
def __iternodes__(self, getter): | |||||
yield None, self | |||||
for child in getter(self.name): | |||||
yield self.name, child | |||||
def __children__(self): | |||||
yield self.name | |||||
for param in self.params: | for param in self.params: | ||||
if param.showkey: | if param.showkey: | ||||
for child in getter(param.name): | |||||
yield param.name, child | |||||
for child in getter(param.value): | |||||
yield param.value, child | |||||
yield param.name | |||||
yield param.value | |||||
def __showtree__(self, write, get, mark): | def __showtree__(self, write, get, mark): | ||||
write("{{") | write("{{") | ||||
@@ -174,7 +170,7 @@ class Template(Node): | |||||
def name(self, value): | def name(self, value): | ||||
self._name = parse_anything(value) | self._name = parse_anything(value) | ||||
def has(self, name, ignore_empty=True): | |||||
def has(self, name, ignore_empty=False): | |||||
"""Return ``True`` if any parameter in the template is named *name*. | """Return ``True`` if any parameter in the template is named *name*. | ||||
With *ignore_empty*, ``False`` will be returned even if the template | With *ignore_empty*, ``False`` will be returned even if the template | ||||
@@ -190,7 +186,7 @@ class Template(Node): | |||||
return True | return True | ||||
return False | return False | ||||
has_param = lambda self, name, ignore_empty=True: \ | |||||
has_param = lambda self, name, ignore_empty=False: \ | |||||
self.has(name, ignore_empty) | self.has(name, ignore_empty) | ||||
has_param.__doc__ = "Alias for :py:meth:`has`." | has_param.__doc__ = "Alias for :py:meth:`has`." | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -41,13 +41,10 @@ class Wikilink(Node): | |||||
return "[[" + str(self.title) + "|" + str(self.text) + "]]" | return "[[" + str(self.title) + "|" + str(self.text) + "]]" | ||||
return "[[" + str(self.title) + "]]" | return "[[" + str(self.title) + "]]" | ||||
def __iternodes__(self, getter): | |||||
yield None, self | |||||
for child in getter(self.title): | |||||
yield self.title, child | |||||
def __children__(self): | |||||
yield self.title | |||||
if self.text is not None: | if self.text is not None: | ||||
for child in getter(self.text): | |||||
yield self.text, child | |||||
yield self.text | |||||
def __strip__(self, normalize, collapse): | def __strip__(self, normalize, collapse): | ||||
if self.text is not None: | if self.text is not None: | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -53,8 +53,12 @@ class Parser(object): | |||||
self._tokenizer = Tokenizer() | self._tokenizer = Tokenizer() | ||||
self._builder = Builder() | self._builder = Builder() | ||||
def parse(self, text, context=0): | |||||
"""Parse *text*, returning a :py:class:`~.Wikicode` object tree.""" | |||||
tokens = self._tokenizer.tokenize(text, context) | |||||
def parse(self, text, context=0, skip_style_tags=False): | |||||
"""Parse *text*, returning a :py:class:`~.Wikicode` object tree. | |||||
If *skip_style_tags* is ``True``, then ``''`` and ``'''`` will not be | |||||
parsed, but instead be treated as plain text. | |||||
""" | |||||
tokens = self._tokenizer.tokenize(text, context, skip_style_tags) | |||||
code = self._builder.build(tokens) | code = self._builder.build(tokens) | ||||
return code | return code |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -55,7 +55,6 @@ Local (stack-specific) contexts: | |||||
* :py:const:`EXT_LINK_URI` | * :py:const:`EXT_LINK_URI` | ||||
* :py:const:`EXT_LINK_TITLE` | * :py:const:`EXT_LINK_TITLE` | ||||
* :py:const:`EXT_LINK_BRACKETS` | |||||
* :py:const:`HEADING` | * :py:const:`HEADING` | ||||
@@ -100,7 +99,8 @@ Aggregate contexts: | |||||
* :py:const:`FAIL` | * :py:const:`FAIL` | ||||
* :py:const:`UNSAFE` | * :py:const:`UNSAFE` | ||||
* :py:const:`DOUBLE` | * :py:const:`DOUBLE` | ||||
* :py:const:`INVALID_LINK` | |||||
* :py:const:`NO_WIKILINKS` | |||||
* :py:const:`NO_EXT_LINKS` | |||||
""" | """ | ||||
@@ -121,38 +121,37 @@ WIKILINK = WIKILINK_TITLE + WIKILINK_TEXT | |||||
EXT_LINK_URI = 1 << 7 | EXT_LINK_URI = 1 << 7 | ||||
EXT_LINK_TITLE = 1 << 8 | EXT_LINK_TITLE = 1 << 8 | ||||
EXT_LINK_BRACKETS = 1 << 9 | |||||
EXT_LINK = EXT_LINK_URI + EXT_LINK_TITLE + EXT_LINK_BRACKETS | |||||
HEADING_LEVEL_1 = 1 << 10 | |||||
HEADING_LEVEL_2 = 1 << 11 | |||||
HEADING_LEVEL_3 = 1 << 12 | |||||
HEADING_LEVEL_4 = 1 << 13 | |||||
HEADING_LEVEL_5 = 1 << 14 | |||||
HEADING_LEVEL_6 = 1 << 15 | |||||
EXT_LINK = EXT_LINK_URI + EXT_LINK_TITLE | |||||
HEADING_LEVEL_1 = 1 << 9 | |||||
HEADING_LEVEL_2 = 1 << 10 | |||||
HEADING_LEVEL_3 = 1 << 11 | |||||
HEADING_LEVEL_4 = 1 << 12 | |||||
HEADING_LEVEL_5 = 1 << 13 | |||||
HEADING_LEVEL_6 = 1 << 14 | |||||
HEADING = (HEADING_LEVEL_1 + HEADING_LEVEL_2 + HEADING_LEVEL_3 + | HEADING = (HEADING_LEVEL_1 + HEADING_LEVEL_2 + HEADING_LEVEL_3 + | ||||
HEADING_LEVEL_4 + HEADING_LEVEL_5 + HEADING_LEVEL_6) | HEADING_LEVEL_4 + HEADING_LEVEL_5 + HEADING_LEVEL_6) | ||||
TAG_OPEN = 1 << 16 | |||||
TAG_ATTR = 1 << 17 | |||||
TAG_BODY = 1 << 18 | |||||
TAG_CLOSE = 1 << 19 | |||||
TAG_OPEN = 1 << 15 | |||||
TAG_ATTR = 1 << 16 | |||||
TAG_BODY = 1 << 17 | |||||
TAG_CLOSE = 1 << 18 | |||||
TAG = TAG_OPEN + TAG_ATTR + TAG_BODY + TAG_CLOSE | TAG = TAG_OPEN + TAG_ATTR + TAG_BODY + TAG_CLOSE | ||||
STYLE_ITALICS = 1 << 20 | |||||
STYLE_BOLD = 1 << 21 | |||||
STYLE_PASS_AGAIN = 1 << 22 | |||||
STYLE_SECOND_PASS = 1 << 23 | |||||
STYLE_ITALICS = 1 << 19 | |||||
STYLE_BOLD = 1 << 20 | |||||
STYLE_PASS_AGAIN = 1 << 21 | |||||
STYLE_SECOND_PASS = 1 << 22 | |||||
STYLE = STYLE_ITALICS + STYLE_BOLD + STYLE_PASS_AGAIN + STYLE_SECOND_PASS | STYLE = STYLE_ITALICS + STYLE_BOLD + STYLE_PASS_AGAIN + STYLE_SECOND_PASS | ||||
DL_TERM = 1 << 24 | |||||
DL_TERM = 1 << 23 | |||||
HAS_TEXT = 1 << 25 | |||||
FAIL_ON_TEXT = 1 << 26 | |||||
FAIL_NEXT = 1 << 27 | |||||
FAIL_ON_LBRACE = 1 << 28 | |||||
FAIL_ON_RBRACE = 1 << 29 | |||||
FAIL_ON_EQUALS = 1 << 30 | |||||
HAS_TEXT = 1 << 24 | |||||
FAIL_ON_TEXT = 1 << 25 | |||||
FAIL_NEXT = 1 << 26 | |||||
FAIL_ON_LBRACE = 1 << 27 | |||||
FAIL_ON_RBRACE = 1 << 28 | |||||
FAIL_ON_EQUALS = 1 << 29 | |||||
SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE + | SAFETY_CHECK = (HAS_TEXT + FAIL_ON_TEXT + FAIL_NEXT + FAIL_ON_LBRACE + | ||||
FAIL_ON_RBRACE + FAIL_ON_EQUALS) | FAIL_ON_RBRACE + FAIL_ON_EQUALS) | ||||
@@ -163,7 +162,8 @@ GL_HEADING = 1 << 0 | |||||
# Aggregate contexts: | # Aggregate contexts: | ||||
FAIL = TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK_TITLE + HEADING + TAG + STYLE | FAIL = TEMPLATE + ARGUMENT + WIKILINK + EXT_LINK_TITLE + HEADING + TAG + STYLE | ||||
UNSAFE = (TEMPLATE_NAME + WIKILINK + EXT_LINK_TITLE + TEMPLATE_PARAM_KEY + | |||||
ARGUMENT_NAME + TAG_CLOSE) | |||||
UNSAFE = (TEMPLATE_NAME + WIKILINK_TITLE + EXT_LINK_TITLE + | |||||
TEMPLATE_PARAM_KEY + ARGUMENT_NAME + TAG_CLOSE) | |||||
DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE | DOUBLE = TEMPLATE_PARAM_KEY + TAG_CLOSE | ||||
INVALID_LINK = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK + EXT_LINK | |||||
NO_WIKILINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK_URI | |||||
NO_EXT_LINKS = TEMPLATE_NAME + ARGUMENT_NAME + WIKILINK_TITLE + EXT_LINK |
@@ -1,6 +1,6 @@ | |||||
/* | /* | ||||
Tokenizer for MWParserFromHell | Tokenizer for MWParserFromHell | ||||
Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | Permission is hereby granted, free of charge, to any person obtaining a copy of | ||||
this software and associated documentation files (the "Software"), to deal in | this software and associated documentation files (the "Software"), to deal in | ||||
@@ -31,7 +31,7 @@ static int is_marker(Py_UNICODE this) | |||||
int i; | int i; | ||||
for (i = 0; i < NUM_MARKERS; i++) { | for (i = 0; i < NUM_MARKERS; i++) { | ||||
if (*MARKERS[i] == this) | |||||
if (MARKERS[i] == this) | |||||
return 1; | return 1; | ||||
} | } | ||||
return 0; | return 0; | ||||
@@ -440,7 +440,7 @@ static int | |||||
Tokenizer_emit_textbuffer(Tokenizer* self, Textbuffer* buffer, int reverse) | Tokenizer_emit_textbuffer(Tokenizer* self, Textbuffer* buffer, int reverse) | ||||
{ | { | ||||
Textbuffer *original = buffer; | Textbuffer *original = buffer; | ||||
int i; | |||||
long i; | |||||
if (reverse) { | if (reverse) { | ||||
do { | do { | ||||
@@ -642,7 +642,7 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self) | |||||
PyObject *tokenlist; | PyObject *tokenlist; | ||||
self->head += 2; | self->head += 2; | ||||
while (Tokenizer_READ(self, 0) == *"{" && braces < MAX_BRACES) { | |||||
while (Tokenizer_READ(self, 0) == '{' && braces < MAX_BRACES) { | |||||
self->head++; | self->head++; | ||||
braces++; | braces++; | ||||
} | } | ||||
@@ -674,8 +674,8 @@ static int Tokenizer_parse_template_or_argument(Tokenizer* self) | |||||
if (BAD_ROUTE) { | if (BAD_ROUTE) { | ||||
char text[MAX_BRACES + 1]; | char text[MAX_BRACES + 1]; | ||||
RESET_ROUTE(); | RESET_ROUTE(); | ||||
for (i = 0; i < braces; i++) text[i] = *"{"; | |||||
text[braces] = *""; | |||||
for (i = 0; i < braces; i++) text[i] = '{'; | |||||
text[braces] = '\0'; | |||||
if (Tokenizer_emit_text_then_stack(self, text)) { | if (Tokenizer_emit_text_then_stack(self, text)) { | ||||
Py_XDECREF(text); | Py_XDECREF(text); | ||||
return -1; | return -1; | ||||
@@ -872,7 +872,7 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self) | |||||
if (Tokenizer_push(self, LC_EXT_LINK_URI)) | if (Tokenizer_push(self, LC_EXT_LINK_URI)) | ||||
return -1; | return -1; | ||||
if (Tokenizer_READ(self, 0) == *"/" && Tokenizer_READ(self, 1) == *"/") { | |||||
if (Tokenizer_READ(self, 0) == '/' && Tokenizer_READ(self, 1) == '/') { | |||||
if (Tokenizer_emit_text(self, "//")) | if (Tokenizer_emit_text(self, "//")) | ||||
return -1; | return -1; | ||||
self->head += 2; | self->head += 2; | ||||
@@ -881,7 +881,7 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self) | |||||
buffer = Textbuffer_new(); | buffer = Textbuffer_new(); | ||||
if (!buffer) | if (!buffer) | ||||
return -1; | return -1; | ||||
while ((this = Tokenizer_READ(self, 0)) != *"") { | |||||
while ((this = Tokenizer_READ(self, 0))) { | |||||
i = 0; | i = 0; | ||||
while (1) { | while (1) { | ||||
if (!valid[i]) | if (!valid[i]) | ||||
@@ -898,18 +898,18 @@ static int Tokenizer_parse_bracketed_uri_scheme(Tokenizer* self) | |||||
self->head++; | self->head++; | ||||
} | } | ||||
end_of_loop: | end_of_loop: | ||||
if (this != *":") { | |||||
if (this != ':') { | |||||
Textbuffer_dealloc(buffer); | Textbuffer_dealloc(buffer); | ||||
Tokenizer_fail_route(self); | Tokenizer_fail_route(self); | ||||
return 0; | return 0; | ||||
} | } | ||||
if (Tokenizer_emit_char(self, *":")) { | |||||
if (Tokenizer_emit_char(self, ':')) { | |||||
Textbuffer_dealloc(buffer); | Textbuffer_dealloc(buffer); | ||||
return -1; | return -1; | ||||
} | } | ||||
self->head++; | self->head++; | ||||
slashes = (Tokenizer_READ(self, 0) == *"/" && | |||||
Tokenizer_READ(self, 1) == *"/"); | |||||
slashes = (Tokenizer_READ(self, 0) == '/' && | |||||
Tokenizer_READ(self, 1) == '/'); | |||||
if (slashes) { | if (slashes) { | ||||
if (Tokenizer_emit_text(self, "//")) { | if (Tokenizer_emit_text(self, "//")) { | ||||
Textbuffer_dealloc(buffer); | Textbuffer_dealloc(buffer); | ||||
@@ -940,7 +940,8 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) | |||||
Textbuffer *scheme_buffer = Textbuffer_new(), *temp_buffer; | Textbuffer *scheme_buffer = Textbuffer_new(), *temp_buffer; | ||||
PyObject *scheme; | PyObject *scheme; | ||||
Py_UNICODE chunk; | Py_UNICODE chunk; | ||||
int slashes, i, j; | |||||
long i; | |||||
int slashes, j; | |||||
if (!scheme_buffer) | if (!scheme_buffer) | ||||
return -1; | return -1; | ||||
@@ -973,8 +974,8 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) | |||||
Textbuffer_dealloc(scheme_buffer); | Textbuffer_dealloc(scheme_buffer); | ||||
return -1; | return -1; | ||||
} | } | ||||
slashes = (Tokenizer_READ(self, 0) == *"/" && | |||||
Tokenizer_READ(self, 1) == *"/"); | |||||
slashes = (Tokenizer_READ(self, 0) == '/' && | |||||
Tokenizer_READ(self, 1) == '/'); | |||||
if (!IS_SCHEME(scheme, slashes, 1)) { | if (!IS_SCHEME(scheme, slashes, 1)) { | ||||
Py_DECREF(scheme); | Py_DECREF(scheme); | ||||
Textbuffer_dealloc(scheme_buffer); | Textbuffer_dealloc(scheme_buffer); | ||||
@@ -988,7 +989,7 @@ static int Tokenizer_parse_free_uri_scheme(Tokenizer* self) | |||||
} | } | ||||
if (Tokenizer_emit_textbuffer(self, scheme_buffer, 1)) | if (Tokenizer_emit_textbuffer(self, scheme_buffer, 1)) | ||||
return -1; | return -1; | ||||
if (Tokenizer_emit_char(self, *":")) | |||||
if (Tokenizer_emit_char(self, ':')) | |||||
return -1; | return -1; | ||||
if (slashes) { | if (slashes) { | ||||
if (Tokenizer_emit_text(self, "//")) | if (Tokenizer_emit_text(self, "//")) | ||||
@@ -1014,13 +1015,13 @@ Tokenizer_handle_free_link_text(Tokenizer* self, int* parens, | |||||
return error; \ | return error; \ | ||||
} | } | ||||
if (this == *"(" && !(*parens)) { | |||||
if (this == '(' && !(*parens)) { | |||||
*parens = 1; | *parens = 1; | ||||
PUSH_TAIL_BUFFER(*tail, -1) | PUSH_TAIL_BUFFER(*tail, -1) | ||||
} | } | ||||
else if (this == *"," || this == *";" || this == *"\\" || this == *"." || | |||||
this == *":" || this == *"!" || this == *"?" || | |||||
(!(*parens) && this == *")")) | |||||
else if (this == ',' || this == ';' || this == '\\' || this == '.' || | |||||
this == ':' || this == '!' || this == '?' || | |||||
(!(*parens) && this == ')')) | |||||
return Textbuffer_write(tail, this); | return Textbuffer_write(tail, this); | ||||
else | else | ||||
PUSH_TAIL_BUFFER(*tail, -1) | PUSH_TAIL_BUFFER(*tail, -1) | ||||
@@ -1037,12 +1038,12 @@ Tokenizer_is_free_link(Tokenizer* self, Py_UNICODE this, Py_UNICODE next) | |||||
Py_UNICODE after = Tokenizer_READ(self, 2); | Py_UNICODE after = Tokenizer_READ(self, 2); | ||||
int ctx = self->topstack->context; | int ctx = self->topstack->context; | ||||
return (this == *"" || this == *"\n" || this == *"[" || this == *"]" || | |||||
this == *"<" || this == *">" || (this == *"'" && next == *"'") || | |||||
(this == *"|" && ctx & LC_TEMPLATE) || | |||||
(this == *"=" && ctx & (LC_TEMPLATE_PARAM_KEY | LC_HEADING)) || | |||||
(this == *"}" && next == *"}" && | |||||
(ctx & LC_TEMPLATE || (after == *"}" && ctx & LC_ARGUMENT)))); | |||||
return (!this || this == '\n' || this == '[' || this == ']' || | |||||
this == '<' || this == '>' || (this == '\'' && next == '\'') || | |||||
(this == '|' && ctx & LC_TEMPLATE) || | |||||
(this == '=' && ctx & (LC_TEMPLATE_PARAM_KEY | LC_HEADING)) || | |||||
(this == '}' && next == '}' && | |||||
(ctx & LC_TEMPLATE || (after == '}' && ctx & LC_ARGUMENT)))); | |||||
} | } | ||||
/* | /* | ||||
@@ -1061,21 +1062,21 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, | |||||
if (BAD_ROUTE) | if (BAD_ROUTE) | ||||
return NULL; | return NULL; | ||||
this = Tokenizer_READ(self, 0); | this = Tokenizer_READ(self, 0); | ||||
if (this == *"" || this == *"\n" || this == *" " || this == *"]") | |||||
if (!this || this == '\n' || this == ' ' || this == ']') | |||||
return Tokenizer_fail_route(self); | return Tokenizer_fail_route(self); | ||||
if (!brackets && this == *"[") | |||||
if (!brackets && this == '[') | |||||
return Tokenizer_fail_route(self); | return Tokenizer_fail_route(self); | ||||
while (1) { | while (1) { | ||||
this = Tokenizer_READ(self, 0); | this = Tokenizer_READ(self, 0); | ||||
next = Tokenizer_READ(self, 1); | next = Tokenizer_READ(self, 1); | ||||
if (this == *"&") { | |||||
if (this == '&') { | |||||
PUSH_TAIL_BUFFER(*extra, NULL) | PUSH_TAIL_BUFFER(*extra, NULL) | ||||
if (Tokenizer_parse_entity(self)) | if (Tokenizer_parse_entity(self)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (this == *"<" && next == *"!" | |||||
&& Tokenizer_READ(self, 2) == *"-" | |||||
&& Tokenizer_READ(self, 3) == *"-") { | |||||
else if (this == '<' && next == '!' | |||||
&& Tokenizer_READ(self, 2) == '-' | |||||
&& Tokenizer_READ(self, 3) == '-') { | |||||
PUSH_TAIL_BUFFER(*extra, NULL) | PUSH_TAIL_BUFFER(*extra, NULL) | ||||
if (Tokenizer_parse_comment(self)) | if (Tokenizer_parse_comment(self)) | ||||
return NULL; | return NULL; | ||||
@@ -1084,16 +1085,16 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, | |||||
self->head--; | self->head--; | ||||
return Tokenizer_pop(self); | return Tokenizer_pop(self); | ||||
} | } | ||||
else if (this == *"" || this == *"\n") | |||||
else if (!this || this == '\n') | |||||
return Tokenizer_fail_route(self); | return Tokenizer_fail_route(self); | ||||
else if (this == *"{" && next == *"{" && Tokenizer_CAN_RECURSE(self)) { | |||||
else if (this == '{' && next == '{' && Tokenizer_CAN_RECURSE(self)) { | |||||
PUSH_TAIL_BUFFER(*extra, NULL) | PUSH_TAIL_BUFFER(*extra, NULL) | ||||
if (Tokenizer_parse_template_or_argument(self)) | if (Tokenizer_parse_template_or_argument(self)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (this == *"]") | |||||
else if (this == ']') | |||||
return Tokenizer_pop(self); | return Tokenizer_pop(self); | ||||
else if (this == *" ") { | |||||
else if (this == ' ') { | |||||
if (brackets) { | if (brackets) { | ||||
if (Tokenizer_emit(self, ExternalLinkSeparator)) | if (Tokenizer_emit(self, ExternalLinkSeparator)) | ||||
return NULL; | return NULL; | ||||
@@ -1102,7 +1103,7 @@ Tokenizer_really_parse_external_link(Tokenizer* self, int brackets, | |||||
self->head++; | self->head++; | ||||
return Tokenizer_parse(self, 0, 0); | return Tokenizer_parse(self, 0, 0); | ||||
} | } | ||||
if (Textbuffer_write(extra, *" ")) | |||||
if (Textbuffer_write(extra, ' ')) | |||||
return NULL; | return NULL; | ||||
return Tokenizer_pop(self); | return Tokenizer_pop(self); | ||||
} | } | ||||
@@ -1157,7 +1158,7 @@ Tokenizer_remove_uri_scheme_from_textbuffer(Tokenizer* self, PyObject* link) | |||||
*/ | */ | ||||
static int Tokenizer_parse_external_link(Tokenizer* self, int brackets) | static int Tokenizer_parse_external_link(Tokenizer* self, int brackets) | ||||
{ | { | ||||
#define INVALID_CONTEXT self->topstack->context & AGG_INVALID_LINK | |||||
#define INVALID_CONTEXT self->topstack->context & AGG_NO_EXT_LINKS | |||||
#define NOT_A_LINK \ | #define NOT_A_LINK \ | ||||
if (!brackets && self->topstack->context & LC_DLTERM) \ | if (!brackets && self->topstack->context & LC_DLTERM) \ | ||||
return Tokenizer_handle_dl_term(self); \ | return Tokenizer_handle_dl_term(self); \ | ||||
@@ -1232,7 +1233,7 @@ static int Tokenizer_parse_heading(Tokenizer* self) | |||||
self->global |= GL_HEADING; | self->global |= GL_HEADING; | ||||
self->head += 1; | self->head += 1; | ||||
while (Tokenizer_READ(self, 0) == *"=") { | |||||
while (Tokenizer_READ(self, 0) == '=') { | |||||
best++; | best++; | ||||
self->head++; | self->head++; | ||||
} | } | ||||
@@ -1242,7 +1243,7 @@ static int Tokenizer_parse_heading(Tokenizer* self) | |||||
RESET_ROUTE(); | RESET_ROUTE(); | ||||
self->head = reset + best - 1; | self->head = reset + best - 1; | ||||
for (i = 0; i < best; i++) { | for (i = 0; i < best; i++) { | ||||
if (Tokenizer_emit_char(self, *"=")) | |||||
if (Tokenizer_emit_char(self, '=')) | |||||
return -1; | return -1; | ||||
} | } | ||||
self->global ^= GL_HEADING; | self->global ^= GL_HEADING; | ||||
@@ -1271,7 +1272,7 @@ static int Tokenizer_parse_heading(Tokenizer* self) | |||||
if (heading->level < best) { | if (heading->level < best) { | ||||
diff = best - heading->level; | diff = best - heading->level; | ||||
for (i = 0; i < diff; i++) { | for (i = 0; i < diff; i++) { | ||||
if (Tokenizer_emit_char(self, *"=")) { | |||||
if (Tokenizer_emit_char(self, '=')) { | |||||
Py_DECREF(heading->title); | Py_DECREF(heading->title); | ||||
free(heading); | free(heading); | ||||
return -1; | return -1; | ||||
@@ -1296,14 +1297,14 @@ static int Tokenizer_parse_heading(Tokenizer* self) | |||||
*/ | */ | ||||
static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) | static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) | ||||
{ | { | ||||
Py_ssize_t reset = self->head, best; | |||||
int i, current, level, diff; | |||||
Py_ssize_t reset = self->head; | |||||
int best, i, current, level, diff; | |||||
HeadingData *after, *heading; | HeadingData *after, *heading; | ||||
PyObject *stack; | PyObject *stack; | ||||
self->head += 1; | self->head += 1; | ||||
best = 1; | best = 1; | ||||
while (Tokenizer_READ(self, 0) == *"=") { | |||||
while (Tokenizer_READ(self, 0) == '=') { | |||||
best++; | best++; | ||||
self->head++; | self->head++; | ||||
} | } | ||||
@@ -1316,7 +1317,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) | |||||
if (level < best) { | if (level < best) { | ||||
diff = best - level; | diff = best - level; | ||||
for (i = 0; i < diff; i++) { | for (i = 0; i < diff; i++) { | ||||
if (Tokenizer_emit_char(self, *"=")) | |||||
if (Tokenizer_emit_char(self, '=')) | |||||
return NULL; | return NULL; | ||||
} | } | ||||
} | } | ||||
@@ -1324,7 +1325,7 @@ static HeadingData* Tokenizer_handle_heading_end(Tokenizer* self) | |||||
} | } | ||||
else { | else { | ||||
for (i = 0; i < best; i++) { | for (i = 0; i < best; i++) { | ||||
if (Tokenizer_emit_char(self, *"=")) { | |||||
if (Tokenizer_emit_char(self, '=')) { | |||||
Py_DECREF(after->title); | Py_DECREF(after->title); | ||||
free(after); | free(after); | ||||
return NULL; | return NULL; | ||||
@@ -1372,21 +1373,21 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) | |||||
return -1; | return -1; | ||||
self->head++; | self->head++; | ||||
this = Tokenizer_READ(self, 0); | this = Tokenizer_READ(self, 0); | ||||
if (this == *"") { | |||||
if (!this) { | |||||
Tokenizer_fail_route(self); | Tokenizer_fail_route(self); | ||||
return 0; | return 0; | ||||
} | } | ||||
if (this == *"#") { | |||||
if (this == '#') { | |||||
numeric = 1; | numeric = 1; | ||||
if (Tokenizer_emit(self, HTMLEntityNumeric)) | if (Tokenizer_emit(self, HTMLEntityNumeric)) | ||||
return -1; | return -1; | ||||
self->head++; | self->head++; | ||||
this = Tokenizer_READ(self, 0); | this = Tokenizer_READ(self, 0); | ||||
if (this == *"") { | |||||
if (!this) { | |||||
Tokenizer_fail_route(self); | Tokenizer_fail_route(self); | ||||
return 0; | return 0; | ||||
} | } | ||||
if (this == *"x" || this == *"X") { | |||||
if (this == 'x' || this == 'X') { | |||||
hexadecimal = 1; | hexadecimal = 1; | ||||
kwargs = PyDict_New(); | kwargs = PyDict_New(); | ||||
if (!kwargs) | if (!kwargs) | ||||
@@ -1416,22 +1417,20 @@ static int Tokenizer_really_parse_entity(Tokenizer* self) | |||||
zeroes = 0; | zeroes = 0; | ||||
while (1) { | while (1) { | ||||
this = Tokenizer_READ(self, 0); | this = Tokenizer_READ(self, 0); | ||||
if (this == *";") { | |||||
if (this == ';') { | |||||
if (i == 0) | if (i == 0) | ||||
FAIL_ROUTE_AND_EXIT() | FAIL_ROUTE_AND_EXIT() | ||||
break; | break; | ||||
} | } | ||||
if (i == 0 && this == *"0") { | |||||
if (i == 0 && this == '0') { | |||||
zeroes++; | zeroes++; | ||||
self->head++; | self->head++; | ||||
continue; | continue; | ||||
} | } | ||||
if (i >= MAX_ENTITY_SIZE) | if (i >= MAX_ENTITY_SIZE) | ||||
FAIL_ROUTE_AND_EXIT() | FAIL_ROUTE_AND_EXIT() | ||||
for (j = 0; j < NUM_MARKERS; j++) { | |||||
if (this == *MARKERS[j]) | |||||
FAIL_ROUTE_AND_EXIT() | |||||
} | |||||
if (is_marker(this)) | |||||
FAIL_ROUTE_AND_EXIT() | |||||
j = 0; | j = 0; | ||||
while (1) { | while (1) { | ||||
if (!valid[j]) | if (!valid[j]) | ||||
@@ -1508,7 +1507,7 @@ static int Tokenizer_parse_entity(Tokenizer* self) | |||||
if (BAD_ROUTE) { | if (BAD_ROUTE) { | ||||
RESET_ROUTE(); | RESET_ROUTE(); | ||||
self->head = reset; | self->head = reset; | ||||
if (Tokenizer_emit_char(self, *"&")) | |||||
if (Tokenizer_emit_char(self, '&')) | |||||
return -1; | return -1; | ||||
return 0; | return 0; | ||||
} | } | ||||
@@ -1537,14 +1536,14 @@ static int Tokenizer_parse_comment(Tokenizer* self) | |||||
return -1; | return -1; | ||||
while (1) { | while (1) { | ||||
this = Tokenizer_READ(self, 0); | this = Tokenizer_READ(self, 0); | ||||
if (this == *"") { | |||||
if (!this) { | |||||
comment = Tokenizer_pop(self); | comment = Tokenizer_pop(self); | ||||
Py_XDECREF(comment); | Py_XDECREF(comment); | ||||
self->head = reset; | self->head = reset; | ||||
return Tokenizer_emit_text(self, "<!--"); | return Tokenizer_emit_text(self, "<!--"); | ||||
} | } | ||||
if (this == *"-" && Tokenizer_READ(self, 1) == this && | |||||
Tokenizer_READ(self, 2) == *">") { | |||||
if (this == '-' && Tokenizer_READ(self, 1) == this && | |||||
Tokenizer_READ(self, 2) == '>') { | |||||
if (Tokenizer_emit_first(self, CommentStart)) | if (Tokenizer_emit_first(self, CommentStart)) | ||||
return -1; | return -1; | ||||
if (Tokenizer_emit(self, CommentEnd)) | if (Tokenizer_emit(self, CommentEnd)) | ||||
@@ -1654,11 +1653,11 @@ static int Tokenizer_handle_tag_text(Tokenizer* self, Py_UNICODE text) | |||||
if (!is_marker(text) || !Tokenizer_CAN_RECURSE(self)) | if (!is_marker(text) || !Tokenizer_CAN_RECURSE(self)) | ||||
return Tokenizer_emit_char(self, text); | return Tokenizer_emit_char(self, text); | ||||
else if (text == next && next == *"{") | |||||
else if (text == next && next == '{') | |||||
return Tokenizer_parse_template_or_argument(self); | return Tokenizer_parse_template_or_argument(self); | ||||
else if (text == next && next == *"[") | |||||
else if (text == next && next == '[') | |||||
return Tokenizer_parse_wikilink(self); | return Tokenizer_parse_wikilink(self); | ||||
else if (text == *"<") | |||||
else if (text == '<') | |||||
return Tokenizer_parse_tag(self); | return Tokenizer_parse_tag(self); | ||||
return Tokenizer_emit_char(self, text); | return Tokenizer_emit_char(self, text); | ||||
} | } | ||||
@@ -1705,7 +1704,7 @@ Tokenizer_handle_tag_data(Tokenizer* self, TagData* data, Py_UNICODE chunk) | |||||
return -1; | return -1; | ||||
} | } | ||||
else if (data->context & TAG_ATTR_NAME) { | else if (data->context & TAG_ATTR_NAME) { | ||||
if (chunk == *"=") { | |||||
if (chunk == '=') { | |||||
data->context = TAG_ATTR_VALUE | TAG_NOTE_QUOTE; | data->context = TAG_ATTR_VALUE | TAG_NOTE_QUOTE; | ||||
if (Tokenizer_emit(self, TagAttrEquals)) | if (Tokenizer_emit(self, TagAttrEquals)) | ||||
return -1; | return -1; | ||||
@@ -1720,11 +1719,11 @@ Tokenizer_handle_tag_data(Tokenizer* self, TagData* data, Py_UNICODE chunk) | |||||
} | } | ||||
} | } | ||||
else if (data->context & TAG_ATTR_VALUE) { | else if (data->context & TAG_ATTR_VALUE) { | ||||
escaped = (Tokenizer_READ_BACKWARDS(self, 1) == *"\\" && | |||||
Tokenizer_READ_BACKWARDS(self, 2) != *"\\"); | |||||
escaped = (Tokenizer_READ_BACKWARDS(self, 1) == '\\' && | |||||
Tokenizer_READ_BACKWARDS(self, 2) != '\\'); | |||||
if (data->context & TAG_NOTE_QUOTE) { | if (data->context & TAG_NOTE_QUOTE) { | ||||
data->context ^= TAG_NOTE_QUOTE; | data->context ^= TAG_NOTE_QUOTE; | ||||
if (chunk == *"\"" && !escaped) { | |||||
if (chunk == '"' && !escaped) { | |||||
data->context |= TAG_QUOTED; | data->context |= TAG_QUOTED; | ||||
if (Tokenizer_push(self, self->topstack->context)) | if (Tokenizer_push(self, self->topstack->context)) | ||||
return -1; | return -1; | ||||
@@ -1733,7 +1732,7 @@ Tokenizer_handle_tag_data(Tokenizer* self, TagData* data, Py_UNICODE chunk) | |||||
} | } | ||||
} | } | ||||
else if (data->context & TAG_QUOTED) { | else if (data->context & TAG_QUOTED) { | ||||
if (chunk == *"\"" && !escaped) { | |||||
if (chunk == '"' && !escaped) { | |||||
data->context |= TAG_NOTE_SPACE; | data->context |= TAG_NOTE_SPACE; | ||||
return 0; | return 0; | ||||
} | } | ||||
@@ -1844,15 +1843,15 @@ static PyObject* Tokenizer_handle_blacklisted_tag(Tokenizer* self) | |||||
while (1) { | while (1) { | ||||
this = Tokenizer_READ(self, 0); | this = Tokenizer_READ(self, 0); | ||||
next = Tokenizer_READ(self, 1); | next = Tokenizer_READ(self, 1); | ||||
if (this == *"") | |||||
if (!this) | |||||
return Tokenizer_fail_route(self); | return Tokenizer_fail_route(self); | ||||
else if (this == *"<" && next == *"/") { | |||||
else if (this == '<' && next == '/') { | |||||
if (Tokenizer_handle_tag_open_close(self)) | if (Tokenizer_handle_tag_open_close(self)) | ||||
return NULL; | return NULL; | ||||
self->head++; | self->head++; | ||||
return Tokenizer_parse(self, 0, 0); | return Tokenizer_parse(self, 0, 0); | ||||
} | } | ||||
else if (this == *"&") { | |||||
else if (this == '&') { | |||||
if (Tokenizer_parse_entity(self)) | if (Tokenizer_parse_entity(self)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
@@ -1957,7 +1956,7 @@ static PyObject* Tokenizer_really_parse_tag(Tokenizer* self) | |||||
next = Tokenizer_READ(self, 1); | next = Tokenizer_READ(self, 1); | ||||
can_exit = (!(data->context & (TAG_QUOTED | TAG_NAME)) || | can_exit = (!(data->context & (TAG_QUOTED | TAG_NAME)) || | ||||
data->context & TAG_NOTE_SPACE); | data->context & TAG_NOTE_SPACE); | ||||
if (this == *"") { | |||||
if (!this) { | |||||
if (self->topstack->context & LC_TAG_ATTR) { | if (self->topstack->context & LC_TAG_ATTR) { | ||||
if (data->context & TAG_QUOTED) { | if (data->context & TAG_QUOTED) { | ||||
// Unclosed attribute quote: reset, don't die | // Unclosed attribute quote: reset, don't die | ||||
@@ -1973,7 +1972,7 @@ static PyObject* Tokenizer_really_parse_tag(Tokenizer* self) | |||||
TagData_dealloc(data); | TagData_dealloc(data); | ||||
return Tokenizer_fail_route(self); | return Tokenizer_fail_route(self); | ||||
} | } | ||||
else if (this == *">" && can_exit) { | |||||
else if (this == '>' && can_exit) { | |||||
if (Tokenizer_handle_tag_close_open(self, data, TagCloseOpen)) { | if (Tokenizer_handle_tag_close_open(self, data, TagCloseOpen)) { | ||||
TagData_dealloc(data); | TagData_dealloc(data); | ||||
return NULL; | return NULL; | ||||
@@ -1995,7 +1994,7 @@ static PyObject* Tokenizer_really_parse_tag(Tokenizer* self) | |||||
Py_DECREF(text); | Py_DECREF(text); | ||||
return Tokenizer_handle_blacklisted_tag(self); | return Tokenizer_handle_blacklisted_tag(self); | ||||
} | } | ||||
else if (this == *"/" && next == *">" && can_exit) { | |||||
else if (this == '/' && next == '>' && can_exit) { | |||||
if (Tokenizer_handle_tag_close_open(self, data, | if (Tokenizer_handle_tag_close_open(self, data, | ||||
TagCloseSelfclose)) { | TagCloseSelfclose)) { | ||||
TagData_dealloc(data); | TagData_dealloc(data); | ||||
@@ -2078,7 +2077,7 @@ static int Tokenizer_parse_tag(Tokenizer* self) | |||||
if (BAD_ROUTE) { | if (BAD_ROUTE) { | ||||
RESET_ROUTE(); | RESET_ROUTE(); | ||||
self->head = reset; | self->head = reset; | ||||
return Tokenizer_emit_char(self, *"<"); | |||||
return Tokenizer_emit_char(self, '<'); | |||||
} | } | ||||
if (!tag) { | if (!tag) { | ||||
return -1; | return -1; | ||||
@@ -2165,12 +2164,12 @@ static int Tokenizer_parse_bold(Tokenizer* self) | |||||
RESET_ROUTE(); | RESET_ROUTE(); | ||||
self->head = reset; | self->head = reset; | ||||
if (self->topstack->context & LC_STYLE_SECOND_PASS) | if (self->topstack->context & LC_STYLE_SECOND_PASS) | ||||
return Tokenizer_emit_char(self, *"'") ? -1 : 1; | |||||
return Tokenizer_emit_char(self, '\'') ? -1 : 1; | |||||
if (self->topstack->context & LC_STYLE_ITALICS) { | if (self->topstack->context & LC_STYLE_ITALICS) { | ||||
self->topstack->context |= LC_STYLE_PASS_AGAIN; | self->topstack->context |= LC_STYLE_PASS_AGAIN; | ||||
return Tokenizer_emit_text(self, "'''"); | return Tokenizer_emit_text(self, "'''"); | ||||
} | } | ||||
if (Tokenizer_emit_char(self, *"'")) | |||||
if (Tokenizer_emit_char(self, '\'')) | |||||
return -1; | return -1; | ||||
return Tokenizer_parse_italics(self); | return Tokenizer_parse_italics(self); | ||||
} | } | ||||
@@ -2256,19 +2255,19 @@ static PyObject* Tokenizer_parse_style(Tokenizer* self) | |||||
int context = self->topstack->context, ticks = 2, i; | int context = self->topstack->context, ticks = 2, i; | ||||
self->head += 2; | self->head += 2; | ||||
while (Tokenizer_READ(self, 0) == *"'") { | |||||
while (Tokenizer_READ(self, 0) == '\'') { | |||||
self->head++; | self->head++; | ||||
ticks++; | ticks++; | ||||
} | } | ||||
if (ticks > 5) { | if (ticks > 5) { | ||||
for (i = 0; i < ticks - 5; i++) { | for (i = 0; i < ticks - 5; i++) { | ||||
if (Tokenizer_emit_char(self, *"'")) | |||||
if (Tokenizer_emit_char(self, '\'')) | |||||
return NULL; | return NULL; | ||||
} | } | ||||
ticks = 5; | ticks = 5; | ||||
} | } | ||||
else if (ticks == 4) { | else if (ticks == 4) { | ||||
if (Tokenizer_emit_char(self, *"'")) | |||||
if (Tokenizer_emit_char(self, '\'')) | |||||
return NULL; | return NULL; | ||||
ticks = 3; | ticks = 3; | ||||
} | } | ||||
@@ -2281,7 +2280,7 @@ static PyObject* Tokenizer_parse_style(Tokenizer* self) | |||||
if (!Tokenizer_CAN_RECURSE(self)) { | if (!Tokenizer_CAN_RECURSE(self)) { | ||||
if (ticks == 3) { | if (ticks == 3) { | ||||
if (context & LC_STYLE_SECOND_PASS) { | if (context & LC_STYLE_SECOND_PASS) { | ||||
if (Tokenizer_emit_char(self, *"'")) | |||||
if (Tokenizer_emit_char(self, '\'')) | |||||
return NULL; | return NULL; | ||||
return Tokenizer_pop(self); | return Tokenizer_pop(self); | ||||
} | } | ||||
@@ -2289,7 +2288,7 @@ static PyObject* Tokenizer_parse_style(Tokenizer* self) | |||||
self->topstack->context |= LC_STYLE_PASS_AGAIN; | self->topstack->context |= LC_STYLE_PASS_AGAIN; | ||||
} | } | ||||
for (i = 0; i < ticks; i++) { | for (i = 0; i < ticks; i++) { | ||||
if (Tokenizer_emit_char(self, *"'")) | |||||
if (Tokenizer_emit_char(self, '\'')) | |||||
return NULL; | return NULL; | ||||
} | } | ||||
} | } | ||||
@@ -2321,7 +2320,7 @@ static int Tokenizer_handle_list_marker(Tokenizer* self) | |||||
PyObject *markup = Tokenizer_read(self, 0), *kwargs; | PyObject *markup = Tokenizer_read(self, 0), *kwargs; | ||||
Py_UNICODE code = *PyUnicode_AS_UNICODE(markup); | Py_UNICODE code = *PyUnicode_AS_UNICODE(markup); | ||||
if (code == *";") | |||||
if (code == ';') | |||||
self->topstack->context |= LC_DLTERM; | self->topstack->context |= LC_DLTERM; | ||||
kwargs = PyDict_New(); | kwargs = PyDict_New(); | ||||
if (!kwargs) | if (!kwargs) | ||||
@@ -2345,8 +2344,8 @@ static int Tokenizer_handle_list(Tokenizer* self) | |||||
if (Tokenizer_handle_list_marker(self)) | if (Tokenizer_handle_list_marker(self)) | ||||
return -1; | return -1; | ||||
while (marker == *"#" || marker == *"*" || marker == *";" || | |||||
marker == *":") { | |||||
while (marker == '#' || marker == '*' || marker == ';' || | |||||
marker == ':') { | |||||
self->head++; | self->head++; | ||||
if (Tokenizer_handle_list_marker(self)) | if (Tokenizer_handle_list_marker(self)) | ||||
return -1; | return -1; | ||||
@@ -2368,11 +2367,11 @@ static int Tokenizer_handle_hr(Tokenizer* self) | |||||
return -1; | return -1; | ||||
self->head += 3; | self->head += 3; | ||||
for (i = 0; i < 4; i++) { | for (i = 0; i < 4; i++) { | ||||
if (Textbuffer_write(&buffer, *"-")) | |||||
if (Textbuffer_write(&buffer, '-')) | |||||
return -1; | return -1; | ||||
} | } | ||||
while (Tokenizer_READ(self, 1) == *"-") { | |||||
if (Textbuffer_write(&buffer, *"-")) | |||||
while (Tokenizer_READ(self, 1) == '-') { | |||||
if (Textbuffer_write(&buffer, '-')) | |||||
return -1; | return -1; | ||||
self->head++; | self->head++; | ||||
} | } | ||||
@@ -2400,9 +2399,9 @@ static int Tokenizer_handle_hr(Tokenizer* self) | |||||
static int Tokenizer_handle_dl_term(Tokenizer* self) | static int Tokenizer_handle_dl_term(Tokenizer* self) | ||||
{ | { | ||||
self->topstack->context ^= LC_DLTERM; | self->topstack->context ^= LC_DLTERM; | ||||
if (Tokenizer_READ(self, 0) == *":") | |||||
if (Tokenizer_READ(self, 0) == ':') | |||||
return Tokenizer_handle_list_marker(self); | return Tokenizer_handle_list_marker(self); | ||||
return Tokenizer_emit_char(self, *"\n"); | |||||
return Tokenizer_emit_char(self, '\n'); | |||||
} | } | ||||
/* | /* | ||||
@@ -2441,28 +2440,26 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) | |||||
{ | { | ||||
if (context & LC_FAIL_NEXT) | if (context & LC_FAIL_NEXT) | ||||
return -1; | return -1; | ||||
if (context & LC_WIKILINK) { | |||||
if (context & LC_WIKILINK_TEXT) | |||||
return (data == *"[" && Tokenizer_READ(self, 1) == *"[") ? -1 : 0; | |||||
else if (data == *"]" || data == *"{") | |||||
if (context & LC_WIKILINK_TITLE) { | |||||
if (data == ']' || data == '{') | |||||
self->topstack->context |= LC_FAIL_NEXT; | self->topstack->context |= LC_FAIL_NEXT; | ||||
else if (data == *"\n" || data == *"[" || data == *"}") | |||||
else if (data == '\n' || data == '[' || data == '}') | |||||
return -1; | return -1; | ||||
return 0; | return 0; | ||||
} | } | ||||
if (context & LC_EXT_LINK_TITLE) | if (context & LC_EXT_LINK_TITLE) | ||||
return (data == *"\n") ? -1 : 0; | |||||
return (data == '\n') ? -1 : 0; | |||||
if (context & LC_TAG_CLOSE) | if (context & LC_TAG_CLOSE) | ||||
return (data == *"<") ? -1 : 0; | |||||
return (data == '<') ? -1 : 0; | |||||
if (context & LC_TEMPLATE_NAME) { | if (context & LC_TEMPLATE_NAME) { | ||||
if (data == *"{" || data == *"}" || data == *"[") { | |||||
if (data == '{' || data == '}' || data == '[') { | |||||
self->topstack->context |= LC_FAIL_NEXT; | self->topstack->context |= LC_FAIL_NEXT; | ||||
return 0; | return 0; | ||||
} | } | ||||
if (data == *"]") { | |||||
if (data == ']') { | |||||
return -1; | return -1; | ||||
} | } | ||||
if (data == *"|") | |||||
if (data == '|') | |||||
return 0; | return 0; | ||||
if (context & LC_HAS_TEXT) { | if (context & LC_HAS_TEXT) { | ||||
if (context & LC_FAIL_ON_TEXT) { | if (context & LC_FAIL_ON_TEXT) { | ||||
@@ -2470,7 +2467,7 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) | |||||
return -1; | return -1; | ||||
} | } | ||||
else { | else { | ||||
if (data == *"\n") | |||||
if (data == '\n') | |||||
self->topstack->context |= LC_FAIL_ON_TEXT; | self->topstack->context |= LC_FAIL_ON_TEXT; | ||||
} | } | ||||
} | } | ||||
@@ -2479,13 +2476,13 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) | |||||
} | } | ||||
else { | else { | ||||
if (context & LC_FAIL_ON_EQUALS) { | if (context & LC_FAIL_ON_EQUALS) { | ||||
if (data == *"=") { | |||||
if (data == '=') { | |||||
return -1; | return -1; | ||||
} | } | ||||
} | } | ||||
else if (context & LC_FAIL_ON_LBRACE) { | else if (context & LC_FAIL_ON_LBRACE) { | ||||
if (data == *"{" || (Tokenizer_READ(self, -1) == *"{" && | |||||
Tokenizer_READ(self, -2) == *"{")) { | |||||
if (data == '{' || (Tokenizer_READ(self, -1) == '{' && | |||||
Tokenizer_READ(self, -2) == '{')) { | |||||
if (context & LC_TEMPLATE) | if (context & LC_TEMPLATE) | ||||
self->topstack->context |= LC_FAIL_ON_EQUALS; | self->topstack->context |= LC_FAIL_ON_EQUALS; | ||||
else | else | ||||
@@ -2495,7 +2492,7 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) | |||||
self->topstack->context ^= LC_FAIL_ON_LBRACE; | self->topstack->context ^= LC_FAIL_ON_LBRACE; | ||||
} | } | ||||
else if (context & LC_FAIL_ON_RBRACE) { | else if (context & LC_FAIL_ON_RBRACE) { | ||||
if (data == *"}") { | |||||
if (data == '}') { | |||||
if (context & LC_TEMPLATE) | if (context & LC_TEMPLATE) | ||||
self->topstack->context |= LC_FAIL_ON_EQUALS; | self->topstack->context |= LC_FAIL_ON_EQUALS; | ||||
else | else | ||||
@@ -2504,9 +2501,9 @@ static int Tokenizer_verify_safe(Tokenizer* self, int context, Py_UNICODE data) | |||||
} | } | ||||
self->topstack->context ^= LC_FAIL_ON_RBRACE; | self->topstack->context ^= LC_FAIL_ON_RBRACE; | ||||
} | } | ||||
else if (data == *"{") | |||||
else if (data == '{') | |||||
self->topstack->context |= LC_FAIL_ON_LBRACE; | self->topstack->context |= LC_FAIL_ON_LBRACE; | ||||
else if (data == *"}") | |||||
else if (data == '}') | |||||
self->topstack->context |= LC_FAIL_ON_RBRACE; | self->topstack->context |= LC_FAIL_ON_RBRACE; | ||||
} | } | ||||
return 0; | return 0; | ||||
@@ -2544,11 +2541,11 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||||
self->head++; | self->head++; | ||||
continue; | continue; | ||||
} | } | ||||
if (this == *"") | |||||
if (!this) | |||||
return Tokenizer_handle_end(self, this_context); | return Tokenizer_handle_end(self, this_context); | ||||
next = Tokenizer_READ(self, 1); | next = Tokenizer_READ(self, 1); | ||||
last = Tokenizer_READ_BACKWARDS(self, 1); | last = Tokenizer_READ_BACKWARDS(self, 1); | ||||
if (this == next && next == *"{") { | |||||
if (this == next && next == '{') { | |||||
if (Tokenizer_CAN_RECURSE(self)) { | if (Tokenizer_CAN_RECURSE(self)) { | ||||
if (Tokenizer_parse_template_or_argument(self)) | if (Tokenizer_parse_template_or_argument(self)) | ||||
return NULL; | return NULL; | ||||
@@ -2556,84 +2553,83 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||||
else if (Tokenizer_emit_char(self, this)) | else if (Tokenizer_emit_char(self, this)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (this == *"|" && this_context & LC_TEMPLATE) { | |||||
else if (this == '|' && this_context & LC_TEMPLATE) { | |||||
if (Tokenizer_handle_template_param(self)) | if (Tokenizer_handle_template_param(self)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (this == *"=" && this_context & LC_TEMPLATE_PARAM_KEY) { | |||||
else if (this == '=' && this_context & LC_TEMPLATE_PARAM_KEY) { | |||||
if (Tokenizer_handle_template_param_value(self)) | if (Tokenizer_handle_template_param_value(self)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (this == next && next == *"}" && this_context & LC_TEMPLATE) | |||||
else if (this == next && next == '}' && this_context & LC_TEMPLATE) | |||||
return Tokenizer_handle_template_end(self); | return Tokenizer_handle_template_end(self); | ||||
else if (this == *"|" && this_context & LC_ARGUMENT_NAME) { | |||||
else if (this == '|' && this_context & LC_ARGUMENT_NAME) { | |||||
if (Tokenizer_handle_argument_separator(self)) | if (Tokenizer_handle_argument_separator(self)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (this == next && next == *"}" && this_context & LC_ARGUMENT) { | |||||
if (Tokenizer_READ(self, 2) == *"}") { | |||||
else if (this == next && next == '}' && this_context & LC_ARGUMENT) { | |||||
if (Tokenizer_READ(self, 2) == '}') { | |||||
return Tokenizer_handle_argument_end(self); | return Tokenizer_handle_argument_end(self); | ||||
} | } | ||||
if (Tokenizer_emit_char(self, this)) | if (Tokenizer_emit_char(self, this)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (this == next && next == *"[" && Tokenizer_CAN_RECURSE(self)) { | |||||
if (!(this_context & AGG_INVALID_LINK)) { | |||||
else if (this == next && next == '[' && Tokenizer_CAN_RECURSE(self)) { | |||||
if (!(this_context & AGG_NO_WIKILINKS)) { | |||||
if (Tokenizer_parse_wikilink(self)) | if (Tokenizer_parse_wikilink(self)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (Tokenizer_emit_char(self, this)) | else if (Tokenizer_emit_char(self, this)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (this == *"|" && this_context & LC_WIKILINK_TITLE) { | |||||
else if (this == '|' && this_context & LC_WIKILINK_TITLE) { | |||||
if (Tokenizer_handle_wikilink_separator(self)) | if (Tokenizer_handle_wikilink_separator(self)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (this == next && next == *"]" && this_context & LC_WIKILINK) | |||||
else if (this == next && next == ']' && this_context & LC_WIKILINK) | |||||
return Tokenizer_handle_wikilink_end(self); | return Tokenizer_handle_wikilink_end(self); | ||||
else if (this == *"[") { | |||||
else if (this == '[') { | |||||
if (Tokenizer_parse_external_link(self, 1)) | if (Tokenizer_parse_external_link(self, 1)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (this == *":" && !is_marker(last)) { | |||||
else if (this == ':' && !is_marker(last)) { | |||||
if (Tokenizer_parse_external_link(self, 0)) | if (Tokenizer_parse_external_link(self, 0)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (this == *"]" && this_context & LC_EXT_LINK_TITLE) | |||||
else if (this == ']' && this_context & LC_EXT_LINK_TITLE) | |||||
return Tokenizer_pop(self); | return Tokenizer_pop(self); | ||||
else if (this == *"=" && !(self->global & GL_HEADING)) { | |||||
if (last == *"\n" || last == *"") { | |||||
else if (this == '=' && !(self->global & GL_HEADING)) { | |||||
if (!last || last == '\n') { | |||||
if (Tokenizer_parse_heading(self)) | if (Tokenizer_parse_heading(self)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (Tokenizer_emit_char(self, this)) | else if (Tokenizer_emit_char(self, this)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (this == *"=" && this_context & LC_HEADING) | |||||
else if (this == '=' && this_context & LC_HEADING) | |||||
return (PyObject*) Tokenizer_handle_heading_end(self); | return (PyObject*) Tokenizer_handle_heading_end(self); | ||||
else if (this == *"\n" && this_context & LC_HEADING) | |||||
else if (this == '\n' && this_context & LC_HEADING) | |||||
return Tokenizer_fail_route(self); | return Tokenizer_fail_route(self); | ||||
else if (this == *"&") { | |||||
else if (this == '&') { | |||||
if (Tokenizer_parse_entity(self)) | if (Tokenizer_parse_entity(self)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (this == *"<" && next == *"!") { | |||||
else if (this == '<' && next == '!') { | |||||
next_next = Tokenizer_READ(self, 2); | next_next = Tokenizer_READ(self, 2); | ||||
if (next_next == Tokenizer_READ(self, 3) && next_next == *"-") { | |||||
if (next_next == Tokenizer_READ(self, 3) && next_next == '-') { | |||||
if (Tokenizer_parse_comment(self)) | if (Tokenizer_parse_comment(self)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (Tokenizer_emit_char(self, this)) | else if (Tokenizer_emit_char(self, this)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (this == *"<" && next == *"/" && | |||||
Tokenizer_READ(self, 2) != *"") { | |||||
else if (this == '<' && next == '/' && Tokenizer_READ(self, 2)) { | |||||
if (this_context & LC_TAG_BODY ? | if (this_context & LC_TAG_BODY ? | ||||
Tokenizer_handle_tag_open_close(self) : | Tokenizer_handle_tag_open_close(self) : | ||||
Tokenizer_handle_invalid_tag_start(self)) | Tokenizer_handle_invalid_tag_start(self)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (this == *"<" && !(this_context & LC_TAG_CLOSE)) { | |||||
else if (this == '<' && !(this_context & LC_TAG_CLOSE)) { | |||||
if (Tokenizer_CAN_RECURSE(self)) { | if (Tokenizer_CAN_RECURSE(self)) { | ||||
if (Tokenizer_parse_tag(self)) | if (Tokenizer_parse_tag(self)) | ||||
return NULL; | return NULL; | ||||
@@ -2641,19 +2637,19 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||||
else if (Tokenizer_emit_char(self, this)) | else if (Tokenizer_emit_char(self, this)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (this == *">" && this_context & LC_TAG_CLOSE) | |||||
else if (this == '>' && this_context & LC_TAG_CLOSE) | |||||
return Tokenizer_handle_tag_close_close(self); | return Tokenizer_handle_tag_close_close(self); | ||||
else if (this == next && next == *"'") { | |||||
else if (this == next && next == '\'' && !self->skip_style_tags) { | |||||
temp = Tokenizer_parse_style(self); | temp = Tokenizer_parse_style(self); | ||||
if (temp != Py_None) | if (temp != Py_None) | ||||
return temp; | return temp; | ||||
} | } | ||||
else if (last == *"\n" || last == *"") { | |||||
if (this == *"#" || this == *"*" || this == *";" || this == *":") { | |||||
else if (!last || last == '\n') { | |||||
if (this == '#' || this == '*' || this == ';' || this == ':') { | |||||
if (Tokenizer_handle_list(self)) | if (Tokenizer_handle_list(self)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
else if (this == *"-" && this == next && | |||||
else if (this == '-' && this == next && | |||||
this == Tokenizer_READ(self, 2) && | this == Tokenizer_READ(self, 2) && | ||||
this == Tokenizer_READ(self, 3)) { | this == Tokenizer_READ(self, 3)) { | ||||
if (Tokenizer_handle_hr(self)) | if (Tokenizer_handle_hr(self)) | ||||
@@ -2662,7 +2658,7 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||||
else if (Tokenizer_emit_char(self, this)) | else if (Tokenizer_emit_char(self, this)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
else if ((this == *"\n" || this == *":") && this_context & LC_DLTERM) { | |||||
else if ((this == '\n' || this == ':') && this_context & LC_DLTERM) { | |||||
if (Tokenizer_handle_dl_term(self)) | if (Tokenizer_handle_dl_term(self)) | ||||
return NULL; | return NULL; | ||||
} | } | ||||
@@ -2678,9 +2674,9 @@ static PyObject* Tokenizer_parse(Tokenizer* self, int context, int push) | |||||
static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) | static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) | ||||
{ | { | ||||
PyObject *text, *temp; | PyObject *text, *temp; | ||||
int context = 0; | |||||
int context = 0, skip_style_tags = 0; | |||||
if (PyArg_ParseTuple(args, "U|i", &text, &context)) { | |||||
if (PyArg_ParseTuple(args, "U|ii", &text, &context, &skip_style_tags)) { | |||||
Py_XDECREF(self->text); | Py_XDECREF(self->text); | ||||
self->text = PySequence_Fast(text, "expected a sequence"); | self->text = PySequence_Fast(text, "expected a sequence"); | ||||
} | } | ||||
@@ -2689,7 +2685,8 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) | |||||
Py_ssize_t size; | Py_ssize_t size; | ||||
/* Failed to parse a Unicode object; try a string instead. */ | /* Failed to parse a Unicode object; try a string instead. */ | ||||
PyErr_Clear(); | PyErr_Clear(); | ||||
if (!PyArg_ParseTuple(args, "s#|i", &encoded, &size, &context)) | |||||
if (!PyArg_ParseTuple(args, "s#|ii", &encoded, &size, &context, | |||||
&skip_style_tags)) | |||||
return NULL; | return NULL; | ||||
temp = PyUnicode_FromStringAndSize(encoded, size); | temp = PyUnicode_FromStringAndSize(encoded, size); | ||||
if (!text) | if (!text) | ||||
@@ -2701,6 +2698,7 @@ static PyObject* Tokenizer_tokenize(Tokenizer* self, PyObject* args) | |||||
} | } | ||||
self->head = self->global = self->depth = self->cycles = 0; | self->head = self->global = self->depth = self->cycles = 0; | ||||
self->length = PyList_GET_SIZE(self->text); | self->length = PyList_GET_SIZE(self->text); | ||||
self->skip_style_tags = skip_style_tags; | |||||
return Tokenizer_parse(self, context, 1); | return Tokenizer_parse(self, context, 1); | ||||
} | } | ||||
@@ -1,6 +1,6 @@ | |||||
/* | /* | ||||
Tokenizer Header File for MWParserFromHell | Tokenizer Header File for MWParserFromHell | ||||
Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy of | Permission is hereby granted, free of charge, to any person obtaining a copy of | ||||
this software and associated documentation files (the "Software"), to deal in | this software and associated documentation files (the "Software"), to deal in | ||||
@@ -41,9 +41,9 @@ SOFTWARE. | |||||
#define HEXDIGITS "0123456789abcdefABCDEF" | #define HEXDIGITS "0123456789abcdefABCDEF" | ||||
#define ALPHANUM "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | #define ALPHANUM "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | ||||
static const char* MARKERS[] = { | |||||
"{", "}", "[", "]", "<", ">", "|", "=", "&", "'", "#", "*", ";", ":", "/", | |||||
"-", "\n", ""}; | |||||
static const char MARKERS[] = { | |||||
'{', '}', '[', ']', '<', '>', '|', '=', '&', '\'', '#', '*', ';', ':', '/', | |||||
'-', '\n', '\0'}; | |||||
#define NUM_MARKERS 18 | #define NUM_MARKERS 18 | ||||
#define TEXTBUFFER_BLOCKSIZE 1024 | #define TEXTBUFFER_BLOCKSIZE 1024 | ||||
@@ -121,40 +121,39 @@ static PyObject* TagCloseClose; | |||||
#define LC_WIKILINK_TITLE 0x00000020 | #define LC_WIKILINK_TITLE 0x00000020 | ||||
#define LC_WIKILINK_TEXT 0x00000040 | #define LC_WIKILINK_TEXT 0x00000040 | ||||
#define LC_EXT_LINK 0x00000380 | |||||
#define LC_EXT_LINK 0x00000180 | |||||
#define LC_EXT_LINK_URI 0x00000080 | #define LC_EXT_LINK_URI 0x00000080 | ||||
#define LC_EXT_LINK_TITLE 0x00000100 | #define LC_EXT_LINK_TITLE 0x00000100 | ||||
#define LC_EXT_LINK_BRACKETS 0x00000200 | |||||
#define LC_HEADING 0x0000FC00 | |||||
#define LC_HEADING_LEVEL_1 0x00000400 | |||||
#define LC_HEADING_LEVEL_2 0x00000800 | |||||
#define LC_HEADING_LEVEL_3 0x00001000 | |||||
#define LC_HEADING_LEVEL_4 0x00002000 | |||||
#define LC_HEADING_LEVEL_5 0x00004000 | |||||
#define LC_HEADING_LEVEL_6 0x00008000 | |||||
#define LC_TAG 0x000F0000 | |||||
#define LC_TAG_OPEN 0x00010000 | |||||
#define LC_TAG_ATTR 0x00020000 | |||||
#define LC_TAG_BODY 0x00040000 | |||||
#define LC_TAG_CLOSE 0x00080000 | |||||
#define LC_STYLE 0x00F00000 | |||||
#define LC_STYLE_ITALICS 0x00100000 | |||||
#define LC_STYLE_BOLD 0x00200000 | |||||
#define LC_STYLE_PASS_AGAIN 0x00400000 | |||||
#define LC_STYLE_SECOND_PASS 0x00800000 | |||||
#define LC_DLTERM 0x01000000 | |||||
#define LC_SAFETY_CHECK 0x7E000000 | |||||
#define LC_HAS_TEXT 0x02000000 | |||||
#define LC_FAIL_ON_TEXT 0x04000000 | |||||
#define LC_FAIL_NEXT 0x08000000 | |||||
#define LC_FAIL_ON_LBRACE 0x10000000 | |||||
#define LC_FAIL_ON_RBRACE 0x20000000 | |||||
#define LC_FAIL_ON_EQUALS 0x40000000 | |||||
#define LC_HEADING 0x00007E00 | |||||
#define LC_HEADING_LEVEL_1 0x00000200 | |||||
#define LC_HEADING_LEVEL_2 0x00000400 | |||||
#define LC_HEADING_LEVEL_3 0x00000800 | |||||
#define LC_HEADING_LEVEL_4 0x00001000 | |||||
#define LC_HEADING_LEVEL_5 0x00002000 | |||||
#define LC_HEADING_LEVEL_6 0x00004000 | |||||
#define LC_TAG 0x00078000 | |||||
#define LC_TAG_OPEN 0x00008000 | |||||
#define LC_TAG_ATTR 0x00010000 | |||||
#define LC_TAG_BODY 0x00020000 | |||||
#define LC_TAG_CLOSE 0x00040000 | |||||
#define LC_STYLE 0x00780000 | |||||
#define LC_STYLE_ITALICS 0x00080000 | |||||
#define LC_STYLE_BOLD 0x00100000 | |||||
#define LC_STYLE_PASS_AGAIN 0x00200000 | |||||
#define LC_STYLE_SECOND_PASS 0x00400000 | |||||
#define LC_DLTERM 0x00800000 | |||||
#define LC_SAFETY_CHECK 0x3F000000 | |||||
#define LC_HAS_TEXT 0x01000000 | |||||
#define LC_FAIL_ON_TEXT 0x02000000 | |||||
#define LC_FAIL_NEXT 0x04000000 | |||||
#define LC_FAIL_ON_LBRACE 0x08000000 | |||||
#define LC_FAIL_ON_RBRACE 0x10000000 | |||||
#define LC_FAIL_ON_EQUALS 0x20000000 | |||||
/* Global contexts: */ | /* Global contexts: */ | ||||
@@ -163,9 +162,10 @@ static PyObject* TagCloseClose; | |||||
/* Aggregate contexts: */ | /* Aggregate contexts: */ | ||||
#define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE) | #define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE) | ||||
#define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) | |||||
#define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME) | |||||
#define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE) | #define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE) | ||||
#define AGG_INVALID_LINK (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK | LC_EXT_LINK) | |||||
#define AGG_NO_WIKILINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI) | |||||
#define AGG_NO_EXT_LINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK) | |||||
/* Tag contexts: */ | /* Tag contexts: */ | ||||
@@ -223,6 +223,7 @@ typedef struct { | |||||
int global; /* global context */ | int global; /* global context */ | ||||
int depth; /* stack recursion depth */ | int depth; /* stack recursion depth */ | ||||
int cycles; /* total number of stack recursions */ | int cycles; /* total number of stack recursions */ | ||||
int skip_style_tags; /* temporary fix for the sometimes broken tag parser */ | |||||
} Tokenizer; | } Tokenizer; | ||||
@@ -241,7 +242,7 @@ typedef struct { | |||||
/* Macros for accessing definitions: */ | /* Macros for accessing definitions: */ | ||||
#define GET_HTML_TAG(markup) (markup == *":" ? "dd" : markup == *";" ? "dt" : "li") | |||||
#define GET_HTML_TAG(markup) (markup == ':' ? "dd" : markup == ';' ? "dt" : "li") | |||||
#define IS_PARSABLE(tag) (call_def_func("is_parsable", tag, NULL, NULL)) | #define IS_PARSABLE(tag) (call_def_func("is_parsable", tag, NULL, NULL)) | ||||
#define IS_SINGLE(tag) (call_def_func("is_single", tag, NULL, NULL)) | #define IS_SINGLE(tag) (call_def_func("is_single", tag, NULL, NULL)) | ||||
#define IS_SINGLE_ONLY(tag) (call_def_func("is_single_only", tag, NULL, NULL)) | #define IS_SINGLE_ONLY(tag) (call_def_func("is_single_only", tag, NULL, NULL)) | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -25,7 +25,7 @@ from math import log | |||||
import re | import re | ||||
from . import contexts, tokens | from . import contexts, tokens | ||||
from ..compat import htmlentities | |||||
from ..compat import htmlentities, range | |||||
from ..definitions import (get_html_tag, is_parsable, is_single, | from ..definitions import (get_html_tag, is_parsable, is_single, | ||||
is_single_only, is_scheme) | is_single_only, is_scheme) | ||||
@@ -467,7 +467,7 @@ class Tokenizer(object): | |||||
reset = self._head | reset = self._head | ||||
self._head += 1 | self._head += 1 | ||||
try: | try: | ||||
bad_context = self._context & contexts.INVALID_LINK | |||||
bad_context = self._context & contexts.NO_EXT_LINKS | |||||
if bad_context or not self._can_recurse(): | if bad_context or not self._can_recurse(): | ||||
raise BadRoute() | raise BadRoute() | ||||
link, extra, delta = self._really_parse_external_link(brackets) | link, extra, delta = self._really_parse_external_link(brackets) | ||||
@@ -620,7 +620,8 @@ class Tokenizer(object): | |||||
self._emit_first(tokens.TagAttrStart(pad_first=buf["first"], | self._emit_first(tokens.TagAttrStart(pad_first=buf["first"], | ||||
pad_before_eq=buf["before_eq"], pad_after_eq=buf["after_eq"])) | pad_before_eq=buf["before_eq"], pad_after_eq=buf["after_eq"])) | ||||
self._emit_all(self._pop()) | self._emit_all(self._pop()) | ||||
data.padding_buffer = {key: "" for key in data.padding_buffer} | |||||
for key in data.padding_buffer: | |||||
data.padding_buffer[key] = "" | |||||
def _handle_tag_space(self, data, text): | def _handle_tag_space(self, data, text): | ||||
"""Handle whitespace (*text*) inside of an HTML open tag.""" | """Handle whitespace (*text*) inside of an HTML open tag.""" | ||||
@@ -989,10 +990,8 @@ class Tokenizer(object): | |||||
context = self._context | context = self._context | ||||
if context & contexts.FAIL_NEXT: | if context & contexts.FAIL_NEXT: | ||||
return False | return False | ||||
if context & contexts.WIKILINK: | |||||
if context & contexts.WIKILINK_TEXT: | |||||
return not (this == self._read(1) == "[") | |||||
elif this == "]" or this == "{": | |||||
if context & contexts.WIKILINK_TITLE: | |||||
if this == "]" or this == "{": | |||||
self._context |= contexts.FAIL_NEXT | self._context |= contexts.FAIL_NEXT | ||||
elif this == "\n" or this == "[" or this == "}": | elif this == "\n" or this == "[" or this == "}": | ||||
return False | return False | ||||
@@ -1082,7 +1081,7 @@ class Tokenizer(object): | |||||
else: | else: | ||||
self._emit_text("}") | self._emit_text("}") | ||||
elif this == next == "[" and self._can_recurse(): | elif this == next == "[" and self._can_recurse(): | ||||
if not self._context & contexts.INVALID_LINK: | |||||
if not self._context & contexts.NO_WIKILINKS: | |||||
self._parse_wikilink() | self._parse_wikilink() | ||||
else: | else: | ||||
self._emit_text("[") | self._emit_text("[") | ||||
@@ -1124,7 +1123,7 @@ class Tokenizer(object): | |||||
self._emit_text("<") | self._emit_text("<") | ||||
elif this == ">" and self._context & contexts.TAG_CLOSE: | elif this == ">" and self._context & contexts.TAG_CLOSE: | ||||
return self._handle_tag_close_close() | return self._handle_tag_close_close() | ||||
elif this == next == "'": | |||||
elif this == next == "'" and not self._skip_style_tags: | |||||
result = self._parse_style() | result = self._parse_style() | ||||
if result is not None: | if result is not None: | ||||
return result | return result | ||||
@@ -1141,8 +1140,9 @@ class Tokenizer(object): | |||||
self._emit_text(this) | self._emit_text(this) | ||||
self._head += 1 | self._head += 1 | ||||
def tokenize(self, text, context=0): | |||||
def tokenize(self, text, context=0, skip_style_tags=False): | |||||
"""Build a list of tokens from a string of wikicode and return it.""" | """Build a list of tokens from a string of wikicode and return it.""" | ||||
self._skip_style_tags = skip_style_tags | |||||
split = self.regex.split(text) | split = self.regex.split(text) | ||||
self._text = [segment for segment in split if segment] | self._text = [segment for segment in split if segment] | ||||
self._head = self._global = self._depth = self._cycles = 0 | self._head = self._global = self._depth = self._cycles = 0 | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -34,15 +34,12 @@ from ..compat import py3k, str | |||||
__all__ = ["Token"] | __all__ = ["Token"] | ||||
class Token(object): | |||||
class Token (dict): | |||||
"""A token stores the semantic meaning of a unit of wikicode.""" | """A token stores the semantic meaning of a unit of wikicode.""" | ||||
def __init__(self, **kwargs): | |||||
super(Token, self).__setattr__("_kwargs", kwargs) | |||||
def __repr__(self): | def __repr__(self): | ||||
args = [] | args = [] | ||||
for key, value in self._kwargs.items(): | |||||
for key, value in self.items(): | |||||
if isinstance(value, str) and len(value) > 100: | if isinstance(value, str) and len(value) > 100: | ||||
args.append(key + "=" + repr(value[:97] + "...")) | args.append(key + "=" + repr(value[:97] + "...")) | ||||
else: | else: | ||||
@@ -50,18 +47,19 @@ class Token(object): | |||||
return "{0}({1})".format(type(self).__name__, ", ".join(args)) | return "{0}({1})".format(type(self).__name__, ", ".join(args)) | ||||
def __eq__(self, other): | def __eq__(self, other): | ||||
if isinstance(other, type(self)): | |||||
return self._kwargs == other._kwargs | |||||
return False | |||||
return isinstance(other, type(self)) and dict.__eq__(self, other) | |||||
def __ne__(self, other): | |||||
return not self.__eq__(other) | |||||
def __getattr__(self, key): | def __getattr__(self, key): | ||||
return self._kwargs.get(key) | |||||
return self.get(key) | |||||
def __setattr__(self, key, value): | def __setattr__(self, key, value): | ||||
self._kwargs[key] = value | |||||
self[key] = value | |||||
def __delattr__(self, key): | def __delattr__(self, key): | ||||
del self._kwargs[key] | |||||
del self[key] | |||||
def make(name): | def make(name): | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -79,6 +79,11 @@ class SmartList(_SliceNormalizerMixIn, list): | |||||
[2, 3, 4] | [2, 3, 4] | ||||
>>> parent | >>> parent | ||||
[0, 1, 2, 3, 4] | [0, 1, 2, 3, 4] | ||||
The parent needs to keep a list of its children in order to update them, | |||||
which prevents them from being garbage-collected. If you are keeping the | |||||
parent around for a while but creating many children, it is advisable to | |||||
call :py:meth:`~._ListProxy.destroy` when you're finished with them. | |||||
""" | """ | ||||
def __init__(self, iterable=None): | def __init__(self, iterable=None): | ||||
@@ -146,6 +151,11 @@ class SmartList(_SliceNormalizerMixIn, list): | |||||
self.extend(other) | self.extend(other) | ||||
return self | return self | ||||
def _release_children(self): | |||||
copy = list(self) | |||||
for child in self._children: | |||||
child._parent = copy | |||||
@inheritdoc | @inheritdoc | ||||
def append(self, item): | def append(self, item): | ||||
head = len(self) | head = len(self) | ||||
@@ -174,17 +184,13 @@ class SmartList(_SliceNormalizerMixIn, list): | |||||
@inheritdoc | @inheritdoc | ||||
def reverse(self): | def reverse(self): | ||||
copy = list(self) | |||||
for child in self._children: | |||||
child._parent = copy | |||||
self._release_children() | |||||
super(SmartList, self).reverse() | super(SmartList, self).reverse() | ||||
if py3k: | if py3k: | ||||
@inheritdoc | @inheritdoc | ||||
def sort(self, key=None, reverse=None): | def sort(self, key=None, reverse=None): | ||||
copy = list(self) | |||||
for child in self._children: | |||||
child._parent = copy | |||||
self._release_children() | |||||
kwargs = {} | kwargs = {} | ||||
if key is not None: | if key is not None: | ||||
kwargs["key"] = key | kwargs["key"] = key | ||||
@@ -194,9 +200,7 @@ class SmartList(_SliceNormalizerMixIn, list): | |||||
else: | else: | ||||
@inheritdoc | @inheritdoc | ||||
def sort(self, cmp=None, key=None, reverse=None): | def sort(self, cmp=None, key=None, reverse=None): | ||||
copy = list(self) | |||||
for child in self._children: | |||||
child._parent = copy | |||||
self._release_children() | |||||
kwargs = {} | kwargs = {} | ||||
if cmp is not None: | if cmp is not None: | ||||
kwargs["cmp"] = cmp | kwargs["cmp"] = cmp | ||||
@@ -448,5 +452,9 @@ class _ListProxy(_SliceNormalizerMixIn, list): | |||||
item.sort(**kwargs) | item.sort(**kwargs) | ||||
self._parent[self._start:self._stop:self._step] = item | self._parent[self._start:self._stop:self._step] = item | ||||
def destroy(self): | |||||
"""Make the parent forget this child. The child will no longer work.""" | |||||
self._parent._children.pop(id(self)) | |||||
del inheritdoc | del inheritdoc |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -26,8 +26,9 @@ interface for the ``unicode`` type (``str`` on py3k) in a dynamic manner. | |||||
""" | """ | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
from sys import getdefaultencoding | |||||
from .compat import py3k, py32, str | |||||
from .compat import bytes, py26, py3k, str | |||||
__all__ = ["StringMixIn"] | __all__ = ["StringMixIn"] | ||||
@@ -55,10 +56,10 @@ class StringMixIn(object): | |||||
return self.__unicode__() | return self.__unicode__() | ||||
def __bytes__(self): | def __bytes__(self): | ||||
return self.__unicode__().encode("utf8") | |||||
return bytes(self.__unicode__(), getdefaultencoding()) | |||||
else: | else: | ||||
def __str__(self): | def __str__(self): | ||||
return self.__unicode__().encode("utf8") | |||||
return bytes(self.__unicode__()) | |||||
def __unicode__(self): | def __unicode__(self): | ||||
raise NotImplementedError() | raise NotImplementedError() | ||||
@@ -67,33 +68,21 @@ class StringMixIn(object): | |||||
return repr(self.__unicode__()) | return repr(self.__unicode__()) | ||||
def __lt__(self, other): | def __lt__(self, other): | ||||
if isinstance(other, StringMixIn): | |||||
return self.__unicode__() < other.__unicode__() | |||||
return self.__unicode__() < other | return self.__unicode__() < other | ||||
def __le__(self, other): | def __le__(self, other): | ||||
if isinstance(other, StringMixIn): | |||||
return self.__unicode__() <= other.__unicode__() | |||||
return self.__unicode__() <= other | return self.__unicode__() <= other | ||||
def __eq__(self, other): | def __eq__(self, other): | ||||
if isinstance(other, StringMixIn): | |||||
return self.__unicode__() == other.__unicode__() | |||||
return self.__unicode__() == other | return self.__unicode__() == other | ||||
def __ne__(self, other): | def __ne__(self, other): | ||||
if isinstance(other, StringMixIn): | |||||
return self.__unicode__() != other.__unicode__() | |||||
return self.__unicode__() != other | return self.__unicode__() != other | ||||
def __gt__(self, other): | def __gt__(self, other): | ||||
if isinstance(other, StringMixIn): | |||||
return self.__unicode__() > other.__unicode__() | |||||
return self.__unicode__() > other | return self.__unicode__() > other | ||||
def __ge__(self, other): | def __ge__(self, other): | ||||
if isinstance(other, StringMixIn): | |||||
return self.__unicode__() >= other.__unicode__() | |||||
return self.__unicode__() >= other | return self.__unicode__() >= other | ||||
if py3k: | if py3k: | ||||
@@ -117,250 +106,22 @@ class StringMixIn(object): | |||||
return reversed(self.__unicode__()) | return reversed(self.__unicode__()) | ||||
def __contains__(self, item): | def __contains__(self, item): | ||||
if isinstance(item, StringMixIn): | |||||
return str(item) in self.__unicode__() | |||||
return item in self.__unicode__() | |||||
return str(item) in self.__unicode__() | |||||
@inheritdoc | |||||
def capitalize(self): | |||||
return self.__unicode__().capitalize() | |||||
if py3k and not py32: | |||||
@inheritdoc | |||||
def casefold(self): | |||||
return self.__unicode__().casefold() | |||||
@inheritdoc | |||||
def center(self, width, fillchar=None): | |||||
if fillchar is None: | |||||
return self.__unicode__().center(width) | |||||
return self.__unicode__().center(width, fillchar) | |||||
@inheritdoc | |||||
def count(self, sub, start=None, end=None): | |||||
return self.__unicode__().count(sub, start, end) | |||||
if not py3k: | |||||
@inheritdoc | |||||
def decode(self, encoding=None, errors=None): | |||||
kwargs = {} | |||||
if encoding is not None: | |||||
kwargs["encoding"] = encoding | |||||
if errors is not None: | |||||
kwargs["errors"] = errors | |||||
return self.__unicode__().decode(**kwargs) | |||||
@inheritdoc | |||||
def encode(self, encoding=None, errors=None): | |||||
kwargs = {} | |||||
if encoding is not None: | |||||
kwargs["encoding"] = encoding | |||||
if errors is not None: | |||||
kwargs["errors"] = errors | |||||
return self.__unicode__().encode(**kwargs) | |||||
@inheritdoc | |||||
def endswith(self, prefix, start=None, end=None): | |||||
return self.__unicode__().endswith(prefix, start, end) | |||||
@inheritdoc | |||||
def expandtabs(self, tabsize=None): | |||||
if tabsize is None: | |||||
return self.__unicode__().expandtabs() | |||||
return self.__unicode__().expandtabs(tabsize) | |||||
@inheritdoc | |||||
def find(self, sub, start=None, end=None): | |||||
return self.__unicode__().find(sub, start, end) | |||||
@inheritdoc | |||||
def format(self, *args, **kwargs): | |||||
return self.__unicode__().format(*args, **kwargs) | |||||
if py3k: | |||||
@inheritdoc | |||||
def format_map(self, mapping): | |||||
return self.__unicode__().format_map(mapping) | |||||
@inheritdoc | |||||
def index(self, sub, start=None, end=None): | |||||
return self.__unicode__().index(sub, start, end) | |||||
@inheritdoc | |||||
def isalnum(self): | |||||
return self.__unicode__().isalnum() | |||||
@inheritdoc | |||||
def isalpha(self): | |||||
return self.__unicode__().isalpha() | |||||
@inheritdoc | |||||
def isdecimal(self): | |||||
return self.__unicode__().isdecimal() | |||||
@inheritdoc | |||||
def isdigit(self): | |||||
return self.__unicode__().isdigit() | |||||
if py3k: | |||||
@inheritdoc | |||||
def isidentifier(self): | |||||
return self.__unicode__().isidentifier() | |||||
@inheritdoc | |||||
def islower(self): | |||||
return self.__unicode__().islower() | |||||
@inheritdoc | |||||
def isnumeric(self): | |||||
return self.__unicode__().isnumeric() | |||||
if py3k: | |||||
@inheritdoc | |||||
def isprintable(self): | |||||
return self.__unicode__().isprintable() | |||||
@inheritdoc | |||||
def isspace(self): | |||||
return self.__unicode__().isspace() | |||||
@inheritdoc | |||||
def istitle(self): | |||||
return self.__unicode__().istitle() | |||||
@inheritdoc | |||||
def isupper(self): | |||||
return self.__unicode__().isupper() | |||||
@inheritdoc | |||||
def join(self, iterable): | |||||
return self.__unicode__().join(iterable) | |||||
@inheritdoc | |||||
def ljust(self, width, fillchar=None): | |||||
if fillchar is None: | |||||
return self.__unicode__().ljust(width) | |||||
return self.__unicode__().ljust(width, fillchar) | |||||
@inheritdoc | |||||
def lower(self): | |||||
return self.__unicode__().lower() | |||||
@inheritdoc | |||||
def lstrip(self, chars=None): | |||||
return self.__unicode__().lstrip(chars) | |||||
def __getattr__(self, attr): | |||||
return getattr(self.__unicode__(), attr) | |||||
if py3k: | if py3k: | ||||
@staticmethod | |||||
@inheritdoc | |||||
def maketrans(x, y=None, z=None): | |||||
if z is None: | |||||
if y is None: | |||||
return str.maketrans(x) | |||||
return str.maketrans(x, y) | |||||
return str.maketrans(x, y, z) | |||||
@inheritdoc | |||||
def partition(self, sep): | |||||
return self.__unicode__().partition(sep) | |||||
maketrans = str.maketrans # Static method can't rely on __getattr__ | |||||
@inheritdoc | |||||
def replace(self, old, new, count=None): | |||||
if count is None: | |||||
return self.__unicode__().replace(old, new) | |||||
return self.__unicode__().replace(old, new, count) | |||||
@inheritdoc | |||||
def rfind(self, sub, start=None, end=None): | |||||
return self.__unicode__().rfind(sub, start, end) | |||||
@inheritdoc | |||||
def rindex(self, sub, start=None, end=None): | |||||
return self.__unicode__().rindex(sub, start, end) | |||||
@inheritdoc | |||||
def rjust(self, width, fillchar=None): | |||||
if fillchar is None: | |||||
return self.__unicode__().rjust(width) | |||||
return self.__unicode__().rjust(width, fillchar) | |||||
@inheritdoc | |||||
def rpartition(self, sep): | |||||
return self.__unicode__().rpartition(sep) | |||||
if py3k and not py32: | |||||
@inheritdoc | |||||
def rsplit(self, sep=None, maxsplit=None): | |||||
kwargs = {} | |||||
if sep is not None: | |||||
kwargs["sep"] = sep | |||||
if maxsplit is not None: | |||||
kwargs["maxsplit"] = maxsplit | |||||
return self.__unicode__().rsplit(**kwargs) | |||||
else: | |||||
if py26: | |||||
@inheritdoc | @inheritdoc | ||||
def rsplit(self, sep=None, maxsplit=None): | |||||
if maxsplit is None: | |||||
if sep is None: | |||||
return self.__unicode__().rsplit() | |||||
return self.__unicode__().rsplit(sep) | |||||
return self.__unicode__().rsplit(sep, maxsplit) | |||||
@inheritdoc | |||||
def rstrip(self, chars=None): | |||||
return self.__unicode__().rstrip(chars) | |||||
if py3k and not py32: | |||||
@inheritdoc | |||||
def split(self, sep=None, maxsplit=None): | |||||
kwargs = {} | |||||
if sep is not None: | |||||
kwargs["sep"] = sep | |||||
if maxsplit is not None: | |||||
kwargs["maxsplit"] = maxsplit | |||||
return self.__unicode__().split(**kwargs) | |||||
else: | |||||
@inheritdoc | |||||
def split(self, sep=None, maxsplit=None): | |||||
if maxsplit is None: | |||||
if sep is None: | |||||
return self.__unicode__().split() | |||||
return self.__unicode__().split(sep) | |||||
return self.__unicode__().split(sep, maxsplit) | |||||
@inheritdoc | |||||
def splitlines(self, keepends=None): | |||||
if keepends is None: | |||||
return self.__unicode__().splitlines() | |||||
return self.__unicode__().splitlines(keepends) | |||||
@inheritdoc | |||||
def startswith(self, prefix, start=None, end=None): | |||||
return self.__unicode__().startswith(prefix, start, end) | |||||
@inheritdoc | |||||
def strip(self, chars=None): | |||||
return self.__unicode__().strip(chars) | |||||
@inheritdoc | |||||
def swapcase(self): | |||||
return self.__unicode__().swapcase() | |||||
@inheritdoc | |||||
def title(self): | |||||
return self.__unicode__().title() | |||||
@inheritdoc | |||||
def translate(self, table): | |||||
return self.__unicode__().translate(table) | |||||
@inheritdoc | |||||
def upper(self): | |||||
return self.__unicode__().upper() | |||||
@inheritdoc | |||||
def zfill(self, width): | |||||
return self.__unicode__().zfill(width) | |||||
def encode(self, encoding=None, errors=None): | |||||
if encoding is None: | |||||
encoding = getdefaultencoding() | |||||
if errors is not None: | |||||
return self.__unicode__().encode(encoding, errors) | |||||
return self.__unicode__().encode(encoding) | |||||
del inheritdoc | del inheritdoc |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,8 +21,8 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
""" | """ | ||||
This module contains accessory functions that wrap around existing ones to | |||||
provide additional functionality. | |||||
This module contains accessory functions for other parts of the library. Parser | |||||
users generally won't need stuff from here. | |||||
""" | """ | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,9 +21,10 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
from itertools import chain | |||||
import re | import re | ||||
from .compat import maxsize, py3k, str | |||||
from .compat import py3k, range, str | |||||
from .nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, | from .nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity, | ||||
Node, Tag, Template, Text, Wikilink) | Node, Tag, Template, Text, Wikilink) | ||||
from .string_mixin import StringMixIn | from .string_mixin import StringMixIn | ||||
@@ -51,96 +52,130 @@ class Wikicode(StringMixIn): | |||||
def __unicode__(self): | def __unicode__(self): | ||||
return "".join([str(node) for node in self.nodes]) | return "".join([str(node) for node in self.nodes]) | ||||
def _get_children(self, node): | |||||
"""Iterate over all descendants of a given *node*, including itself. | |||||
This is implemented by the ``__iternodes__()`` generator of ``Node`` | |||||
classes, which by default yields itself and nothing more. | |||||
""" | |||||
for context, child in node.__iternodes__(self._get_all_nodes): | |||||
yield child | |||||
def _get_all_nodes(self, code): | |||||
"""Iterate over all of our descendant nodes. | |||||
This is implemented by calling :py:meth:`_get_children` on every node | |||||
in our node list (:py:attr:`self.nodes <nodes>`). | |||||
@staticmethod | |||||
def _get_children(node, contexts=False, parent=None): | |||||
"""Iterate over all child :py:class:`.Node`\ s of a given *node*.""" | |||||
yield (parent, node) if contexts else node | |||||
for code in node.__children__(): | |||||
for child in code.nodes: | |||||
for result in Wikicode._get_children(child, contexts, code): | |||||
yield result | |||||
@staticmethod | |||||
def _slice_replace(code, index, old, new): | |||||
"""Replace the string *old* with *new* across *index* in *code*.""" | |||||
nodes = [str(node) for node in code.get(index)] | |||||
substring = "".join(nodes).replace(old, new) | |||||
code.nodes[index] = parse_anything(substring).nodes | |||||
@staticmethod | |||||
def _build_matcher(matches, flags): | |||||
"""Helper for :py:meth:`_indexed_ifilter` and others. | |||||
If *matches* is a function, return it. If it's a regex, return a | |||||
wrapper around it that can be called with a node to do a search. If | |||||
it's ``None``, return a function that always returns ``True``. | |||||
""" | """ | ||||
for node in code.nodes: | |||||
for child in self._get_children(node): | |||||
yield child | |||||
def _is_equivalent(self, obj, node): | |||||
"""Return ``True`` if *obj* and *node* are equivalent, else ``False``. | |||||
If *obj* is a ``Node``, the function will test whether they are the | |||||
same object, otherwise it will compare them with ``==``. | |||||
if matches: | |||||
if callable(matches): | |||||
return matches | |||||
return lambda obj: re.search(matches, str(obj), flags) # r | |||||
return lambda obj: True | |||||
def _indexed_ifilter(self, recursive=True, matches=None, flags=FLAGS, | |||||
forcetype=None): | |||||
"""Iterate over nodes and their corresponding indices in the node list. | |||||
The arguments are interpreted as for :py:meth:`ifilter`. For each tuple | |||||
``(i, node)`` yielded by this method, ``self.index(node) == i``. Note | |||||
that if *recursive* is ``True``, ``self.nodes[i]`` might not be the | |||||
node itself, but will still contain it. | |||||
""" | """ | ||||
return (node is obj) if isinstance(obj, Node) else (node == obj) | |||||
def _contains(self, nodes, obj): | |||||
"""Return ``True`` if *obj* is inside of *nodes*, else ``False``. | |||||
If *obj* is a ``Node``, we will only return ``True`` if *obj* is | |||||
actually in the list (and not just a node that equals it). Otherwise, | |||||
the test is simply ``obj in nodes``. | |||||
match = self._build_matcher(matches, flags) | |||||
if recursive: | |||||
def getter(i, node): | |||||
for ch in self._get_children(node): | |||||
yield (i, ch) | |||||
inodes = chain(*(getter(i, n) for i, n in enumerate(self.nodes))) | |||||
else: | |||||
inodes = enumerate(self.nodes) | |||||
for i, node in inodes: | |||||
if (not forcetype or isinstance(node, forcetype)) and match(node): | |||||
yield (i, node) | |||||
def _do_strong_search(self, obj, recursive=True): | |||||
"""Search for the specific element *obj* within the node list. | |||||
*obj* can be either a :py:class:`.Node` or a :py:class:`.Wikicode` | |||||
object. If found, we return a tuple (*context*, *index*) where | |||||
*context* is the :py:class:`.Wikicode` that contains *obj* and *index* | |||||
is its index there, as a :py:class:`slice`. Note that if *recursive* is | |||||
``False``, *context* will always be ``self`` (since we only look for | |||||
*obj* among immediate descendants), but if *recursive* is ``True``, | |||||
then it could be any :py:class:`.Wikicode` contained by a node within | |||||
``self``. If *obj* is not found, :py:exc:`ValueError` is raised. | |||||
""" | """ | ||||
mkslice = lambda i: slice(i, i + 1) | |||||
if isinstance(obj, Node): | if isinstance(obj, Node): | ||||
for node in nodes: | |||||
if node is obj: | |||||
return True | |||||
return False | |||||
return obj in nodes | |||||
def _do_search(self, obj, recursive, context=None, literal=None): | |||||
"""Return some info about the location of *obj* within *context*. | |||||
If *recursive* is ``True``, we'll look within *context* (``self`` by | |||||
default) and its descendants, otherwise just *context*. We raise | |||||
:py:exc:`ValueError` if *obj* isn't found. The return data is a list of | |||||
3-tuples (*type*, *context*, *data*) where *type* is *obj*\ 's best | |||||
type resolution (either ``Node``, ``Wikicode``, or ``str``), *context* | |||||
is the closest ``Wikicode`` encompassing it, and *data* is either a | |||||
``Node``, a list of ``Node``\ s, or ``None`` depending on *type*. | |||||
""" | |||||
if not context: | |||||
context = self | |||||
literal = isinstance(obj, (Node, Wikicode)) | |||||
obj = parse_anything(obj) | |||||
if not obj or obj not in self: | |||||
raise ValueError(obj) | |||||
if len(obj.nodes) == 1: | |||||
obj = obj.get(0) | |||||
if not recursive: | |||||
return self, mkslice(self.index(obj)) | |||||
for i, node in enumerate(self.nodes): | |||||
for context, child in self._get_children(node, contexts=True): | |||||
if obj is child: | |||||
if not context: | |||||
context = self | |||||
return context, mkslice(context.index(child)) | |||||
else: | |||||
context, ind = self._do_strong_search(obj.get(0), recursive) | |||||
for i in range(1, len(obj.nodes)): | |||||
if obj.get(i) is not context.get(ind.start + i): | |||||
break | |||||
else: | |||||
return context, slice(ind.start, ind.start + len(obj.nodes)) | |||||
raise ValueError(obj) | |||||
compare = lambda a, b: (a is b) if literal else (a == b) | |||||
results = [] | |||||
i = 0 | |||||
while i < len(context.nodes): | |||||
node = context.get(i) | |||||
if isinstance(obj, Node) and compare(obj, node): | |||||
results.append((Node, context, node)) | |||||
elif isinstance(obj, Wikicode) and compare(obj.get(0), node): | |||||
for j in range(1, len(obj.nodes)): | |||||
if not compare(obj.get(j), context.get(i + j)): | |||||
break | |||||
else: | |||||
nodes = list(context.nodes[i:i + len(obj.nodes)]) | |||||
results.append((Wikicode, context, nodes)) | |||||
i += len(obj.nodes) - 1 | |||||
elif recursive: | |||||
contexts = node.__iternodes__(self._get_all_nodes) | |||||
processed = [] | |||||
for code in (ctx for ctx, child in contexts): | |||||
if code and code not in processed and obj in code: | |||||
search = self._do_search(obj, recursive, code, literal) | |||||
results.extend(search) | |||||
processed.append(code) | |||||
i += 1 | |||||
if not results and not literal and recursive: | |||||
results.append((str, context, None)) | |||||
if not results and context is self: | |||||
def _do_weak_search(self, obj, recursive): | |||||
"""Search for an element that looks like *obj* within the node list. | |||||
This follows the same rules as :py:meth:`_do_strong_search` with some | |||||
differences. *obj* is treated as a string that might represent any | |||||
:py:class:`.Node`, :py:class:`.Wikicode`, or combination of the two | |||||
present in the node list. Thus, matching is weak (using string | |||||
comparisons) rather than strong (using ``is``). Because multiple nodes | |||||
can match *obj*, the result is a list of tuples instead of just one | |||||
(however, :py:exc:`ValueError` is still raised if nothing is found). | |||||
Individual matches will never overlap. | |||||
The tuples contain a new first element, *exact*, which is ``True`` if | |||||
we were able to match *obj* exactly to one or more adjacent nodes, or | |||||
``False`` if we found *obj* inside a node or incompletely spanning | |||||
multiple nodes. | |||||
""" | |||||
obj = parse_anything(obj) | |||||
if not obj or obj not in self: | |||||
raise ValueError(obj) | raise ValueError(obj) | ||||
results = [] | |||||
contexts = [self] | |||||
while contexts: | |||||
context = contexts.pop() | |||||
i = len(context.nodes) - 1 | |||||
while i >= 0: | |||||
node = context.get(i) | |||||
if obj.get(-1) == node: | |||||
for j in range(-len(obj.nodes), -1): | |||||
if obj.get(j) != context.get(i + j + 1): | |||||
break | |||||
else: | |||||
i -= len(obj.nodes) - 1 | |||||
index = slice(i, i + len(obj.nodes)) | |||||
results.append((True, context, index)) | |||||
elif recursive and obj in node: | |||||
contexts.extend(node.__children__()) | |||||
i -= 1 | |||||
if not results: | |||||
if not recursive: | |||||
raise ValueError(obj) | |||||
results.append((False, self, slice(0, len(self.nodes)))) | |||||
return results | return results | ||||
def _get_tree(self, code, lines, marker, indent): | def _get_tree(self, code, lines, marker, indent): | ||||
@@ -245,14 +280,14 @@ class Wikicode(StringMixIn): | |||||
return the index of our direct descendant node within *our* list of | return the index of our direct descendant node within *our* list of | ||||
nodes. Otherwise, the lookup is done only on direct descendants. | nodes. Otherwise, the lookup is done only on direct descendants. | ||||
""" | """ | ||||
if recursive: | |||||
for i, node in enumerate(self.nodes): | |||||
if self._contains(self._get_children(node), obj): | |||||
return i | |||||
raise ValueError(obj) | |||||
strict = isinstance(obj, Node) | |||||
equivalent = (lambda o, n: o is n) if strict else (lambda o, n: o == n) | |||||
for i, node in enumerate(self.nodes): | for i, node in enumerate(self.nodes): | ||||
if self._is_equivalent(obj, node): | |||||
if recursive: | |||||
for child in self._get_children(node): | |||||
if equivalent(obj, child): | |||||
return i | |||||
elif equivalent(obj, node): | |||||
return i | return i | ||||
raise ValueError(obj) | raise ValueError(obj) | ||||
@@ -268,66 +303,79 @@ class Wikicode(StringMixIn): | |||||
self.nodes.insert(index, node) | self.nodes.insert(index, node) | ||||
def insert_before(self, obj, value, recursive=True): | def insert_before(self, obj, value, recursive=True): | ||||
"""Insert *value* immediately before *obj* in the list of nodes. | |||||
"""Insert *value* immediately before *obj*. | |||||
*obj* can be either a string, a :py:class:`~.Node`, or other | |||||
*obj* can be either a string, a :py:class:`~.Node`, or another | |||||
:py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`, | :py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`, | ||||
for example). *value* can be anything parasable by | |||||
:py:func:`.parse_anything`. If *recursive* is ``True``, we will try to | |||||
find *obj* within our child nodes even if it is not a direct descendant | |||||
of this :py:class:`~.Wikicode` object. If *obj* is not found, | |||||
for example). If *obj* is a string, we will operate on all instances | |||||
of that string within the code, otherwise only on the specific instance | |||||
given. *value* can be anything parasable by :py:func:`.parse_anything`. | |||||
If *recursive* is ``True``, we will try to find *obj* within our child | |||||
nodes even if it is not a direct descendant of this | |||||
:py:class:`~.Wikicode` object. If *obj* is not found, | |||||
:py:exc:`ValueError` is raised. | :py:exc:`ValueError` is raised. | ||||
""" | """ | ||||
for restype, context, data in self._do_search(obj, recursive): | |||||
if restype in (Node, Wikicode): | |||||
i = context.index(data if restype is Node else data[0], False) | |||||
context.insert(i, value) | |||||
else: | |||||
obj = str(obj) | |||||
context.nodes = str(context).replace(obj, str(value) + obj) | |||||
if isinstance(obj, (Node, Wikicode)): | |||||
context, index = self._do_strong_search(obj, recursive) | |||||
context.insert(index.start, value) | |||||
else: | |||||
for exact, context, index in self._do_weak_search(obj, recursive): | |||||
if exact: | |||||
context.insert(index.start, value) | |||||
else: | |||||
obj = str(obj) | |||||
self._slice_replace(context, index, obj, str(value) + obj) | |||||
def insert_after(self, obj, value, recursive=True): | def insert_after(self, obj, value, recursive=True): | ||||
"""Insert *value* immediately after *obj* in the list of nodes. | |||||
"""Insert *value* immediately after *obj*. | |||||
*obj* can be either a string, a :py:class:`~.Node`, or other | |||||
*obj* can be either a string, a :py:class:`~.Node`, or another | |||||
:py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`, | :py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`, | ||||
for example). *value* can be anything parasable by | |||||
:py:func:`.parse_anything`. If *recursive* is ``True``, we will try to | |||||
find *obj* within our child nodes even if it is not a direct descendant | |||||
of this :py:class:`~.Wikicode` object. If *obj* is not found, | |||||
for example). If *obj* is a string, we will operate on all instances | |||||
of that string within the code, otherwise only on the specific instance | |||||
given. *value* can be anything parasable by :py:func:`.parse_anything`. | |||||
If *recursive* is ``True``, we will try to find *obj* within our child | |||||
nodes even if it is not a direct descendant of this | |||||
:py:class:`~.Wikicode` object. If *obj* is not found, | |||||
:py:exc:`ValueError` is raised. | :py:exc:`ValueError` is raised. | ||||
""" | """ | ||||
for restype, context, data in self._do_search(obj, recursive): | |||||
if restype in (Node, Wikicode): | |||||
i = context.index(data if restype is Node else data[-1], False) | |||||
context.insert(i + 1, value) | |||||
else: | |||||
obj = str(obj) | |||||
context.nodes = str(context).replace(obj, obj + str(value)) | |||||
if isinstance(obj, (Node, Wikicode)): | |||||
context, index = self._do_strong_search(obj, recursive) | |||||
context.insert(index.stop, value) | |||||
else: | |||||
for exact, context, index in self._do_weak_search(obj, recursive): | |||||
if exact: | |||||
context.insert(index.stop, value) | |||||
else: | |||||
obj = str(obj) | |||||
self._slice_replace(context, index, obj, obj + str(value)) | |||||
def replace(self, obj, value, recursive=True): | def replace(self, obj, value, recursive=True): | ||||
"""Replace *obj* with *value* in the list of nodes. | |||||
"""Replace *obj* with *value*. | |||||
*obj* can be either a string, a :py:class:`~.Node`, or other | |||||
*obj* can be either a string, a :py:class:`~.Node`, or another | |||||
:py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`, | :py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`, | ||||
for example). *value* can be anything parasable by | |||||
:py:func:`.parse_anything`. If *recursive* is ``True``, we will try to | |||||
find *obj* within our child nodes even if it is not a direct descendant | |||||
of this :py:class:`~.Wikicode` object. If *obj* is not found, | |||||
for example). If *obj* is a string, we will operate on all instances | |||||
of that string within the code, otherwise only on the specific instance | |||||
given. *value* can be anything parasable by :py:func:`.parse_anything`. | |||||
If *recursive* is ``True``, we will try to find *obj* within our child | |||||
nodes even if it is not a direct descendant of this | |||||
:py:class:`~.Wikicode` object. If *obj* is not found, | |||||
:py:exc:`ValueError` is raised. | :py:exc:`ValueError` is raised. | ||||
""" | """ | ||||
for restype, context, data in self._do_search(obj, recursive): | |||||
if restype is Node: | |||||
i = context.index(data, False) | |||||
context.nodes.pop(i) | |||||
context.insert(i, value) | |||||
elif restype is Wikicode: | |||||
i = context.index(data[0], False) | |||||
for _ in data: | |||||
context.nodes.pop(i) | |||||
context.insert(i, value) | |||||
else: | |||||
context.nodes = str(context).replace(str(obj), str(value)) | |||||
if isinstance(obj, (Node, Wikicode)): | |||||
context, index = self._do_strong_search(obj, recursive) | |||||
for i in range(index.start, index.stop): | |||||
context.nodes.pop(index.start) | |||||
context.insert(index.start, value) | |||||
else: | |||||
for exact, context, index in self._do_weak_search(obj, recursive): | |||||
if exact: | |||||
for i in range(index.start, index.stop): | |||||
context.nodes.pop(index.start) | |||||
context.insert(index.start, value) | |||||
else: | |||||
self._slice_replace(context, index, str(obj), str(value)) | |||||
def append(self, value): | def append(self, value): | ||||
"""Insert *value* at the end of the list of nodes. | """Insert *value* at the end of the list of nodes. | ||||
@@ -341,55 +389,65 @@ class Wikicode(StringMixIn): | |||||
def remove(self, obj, recursive=True): | def remove(self, obj, recursive=True): | ||||
"""Remove *obj* from the list of nodes. | """Remove *obj* from the list of nodes. | ||||
*obj* can be either a string, a :py:class:`~.Node`, or other | |||||
*obj* can be either a string, a :py:class:`~.Node`, or another | |||||
:py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`, | :py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`, | ||||
for example). If *recursive* is ``True``, we will try to find *obj* | |||||
within our child nodes even if it is not a direct descendant of this | |||||
for example). If *obj* is a string, we will operate on all instances | |||||
of that string within the code, otherwise only on the specific instance | |||||
given. If *recursive* is ``True``, we will try to find *obj* within our | |||||
child nodes even if it is not a direct descendant of this | |||||
:py:class:`~.Wikicode` object. If *obj* is not found, | :py:class:`~.Wikicode` object. If *obj* is not found, | ||||
:py:exc:`ValueError` is raised. | :py:exc:`ValueError` is raised. | ||||
""" | """ | ||||
for restype, context, data in self._do_search(obj, recursive): | |||||
if restype is Node: | |||||
context.nodes.pop(context.index(data, False)) | |||||
elif restype is Wikicode: | |||||
i = context.index(data[0], False) | |||||
for _ in data: | |||||
context.nodes.pop(i) | |||||
else: | |||||
context.nodes = str(context).replace(str(obj), "") | |||||
if isinstance(obj, (Node, Wikicode)): | |||||
context, index = self._do_strong_search(obj, recursive) | |||||
for i in range(index.start, index.stop): | |||||
context.nodes.pop(index.start) | |||||
else: | |||||
for exact, context, index in self._do_weak_search(obj, recursive): | |||||
if exact: | |||||
for i in range(index.start, index.stop): | |||||
context.nodes.pop(index.start) | |||||
else: | |||||
self._slice_replace(context, index, str(obj), "") | |||||
def matches(self, other): | def matches(self, other): | ||||
"""Do a loose equivalency test suitable for comparing page names. | """Do a loose equivalency test suitable for comparing page names. | ||||
*other* can be any string-like object, including | *other* can be any string-like object, including | ||||
:py:class:`~.Wikicode`. This operation is symmetric; both sides are | |||||
adjusted. Specifically, whitespace and markup is stripped and the first | |||||
letter's case is normalized. Typical usage is | |||||
:py:class:`~.Wikicode`, or a tuple of these. This operation is | |||||
symmetric; both sides are adjusted. Specifically, whitespace and markup | |||||
is stripped and the first letter's case is normalized. Typical usage is | |||||
``if template.name.matches("stub"): ...``. | ``if template.name.matches("stub"): ...``. | ||||
""" | """ | ||||
cmp = lambda a, b: (a[0].upper() + a[1:] == b[0].upper() + b[1:] | |||||
if a and b else a == b) | |||||
this = self.strip_code().strip() | this = self.strip_code().strip() | ||||
if isinstance(other, (tuple, list)): | |||||
for obj in other: | |||||
that = parse_anything(obj).strip_code().strip() | |||||
if cmp(this, that): | |||||
return True | |||||
return False | |||||
that = parse_anything(other).strip_code().strip() | that = parse_anything(other).strip_code().strip() | ||||
if not this or not that: | |||||
return this == that | |||||
return this[0].upper() + this[1:] == that[0].upper() + that[1:] | |||||
return cmp(this, that) | |||||
def ifilter(self, recursive=True, matches=None, flags=FLAGS, | def ifilter(self, recursive=True, matches=None, flags=FLAGS, | ||||
forcetype=None): | forcetype=None): | ||||
"""Iterate over nodes in our list matching certain conditions. | """Iterate over nodes in our list matching certain conditions. | ||||
If *recursive* is ``True``, we will iterate over our children and all | If *recursive* is ``True``, we will iterate over our children and all | ||||
descendants of our children, otherwise just our immediate children. If | |||||
*matches* is given, we will only yield the nodes that match the given | |||||
regular expression (with :py:func:`re.search`). The default flags used | |||||
are :py:const:`re.IGNORECASE`, :py:const:`re.DOTALL`, and | |||||
:py:const:`re.UNICODE`, but custom flags can be specified by passing | |||||
*flags*. If *forcetype* is given, only nodes that are instances of this | |||||
type are yielded. | |||||
of their descendants, otherwise just our immediate children. If | |||||
*forcetype* is given, only nodes that are instances of this type are | |||||
yielded. *matches* can be used to further restrict the nodes, either as | |||||
a function (taking a single :py:class:`.Node` and returning a boolean) | |||||
or a regular expression (matched against the node's string | |||||
representation with :py:func:`re.search`). If *matches* is a regex, the | |||||
flags passed to :py:func:`re.search` are :py:const:`re.IGNORECASE`, | |||||
:py:const:`re.DOTALL`, and :py:const:`re.UNICODE`, but custom flags can | |||||
be specified by passing *flags*. | |||||
""" | """ | ||||
for node in (self._get_all_nodes(self) if recursive else self.nodes): | |||||
if not forcetype or isinstance(node, forcetype): | |||||
if not matches or re.search(matches, str(node), flags): | |||||
yield node | |||||
return (node for i, node in | |||||
self._indexed_ifilter(recursive, matches, flags, forcetype)) | |||||
def filter(self, recursive=True, matches=None, flags=FLAGS, | def filter(self, recursive=True, matches=None, flags=FLAGS, | ||||
forcetype=None): | forcetype=None): | ||||
@@ -399,7 +457,7 @@ class Wikicode(StringMixIn): | |||||
""" | """ | ||||
return list(self.ifilter(recursive, matches, flags, forcetype)) | return list(self.ifilter(recursive, matches, flags, forcetype)) | ||||
def get_sections(self, levels=None, matches=None, flags=FLAGS, | |||||
def get_sections(self, levels=None, matches=None, flags=FLAGS, flat=False, | |||||
include_lead=None, include_headings=True): | include_lead=None, include_headings=True): | ||||
"""Return a list of sections within the page. | """Return a list of sections within the page. | ||||
@@ -407,13 +465,13 @@ class Wikicode(StringMixIn): | |||||
node list (implemented using :py:class:`~.SmartList`) so that changes | node list (implemented using :py:class:`~.SmartList`) so that changes | ||||
to sections are reflected in the parent Wikicode object. | to sections are reflected in the parent Wikicode object. | ||||
Each section contains all of its subsections. If *levels* is given, it | |||||
should be a iterable of integers; only sections whose heading levels | |||||
are within it will be returned. If *matches* is given, it should be a | |||||
regex to be matched against the titles of section headings; only | |||||
sections whose headings match the regex will be included. *flags* can | |||||
be used to override the default regex flags (see :py:meth:`ifilter`) if | |||||
*matches* is used. | |||||
Each section contains all of its subsections, unless *flat* is | |||||
``True``. If *levels* is given, it should be a iterable of integers; | |||||
only sections whose heading levels are within it will be returned. If | |||||
*matches* is given, it should be either a function or a regex; only | |||||
sections whose headings match it (without the surrounding equal signs) | |||||
will be included. *flags* can be used to override the default regex | |||||
flags (see :py:meth:`ifilter`) if a regex *matches* is used. | |||||
If *include_lead* is ``True``, the first, lead section (without a | If *include_lead* is ``True``, the first, lead section (without a | ||||
heading) will be included in the list; ``False`` will not include it; | heading) will be included in the list; ``False`` will not include it; | ||||
@@ -422,47 +480,58 @@ class Wikicode(StringMixIn): | |||||
:py:class:`~.Heading` object will be included; otherwise, this is | :py:class:`~.Heading` object will be included; otherwise, this is | ||||
skipped. | skipped. | ||||
""" | """ | ||||
if matches: | |||||
matches = r"^(=+?)\s*" + matches + r"\s*\1$" | |||||
headings = self.filter_headings() | |||||
filtered = self.filter_headings(matches=matches, flags=flags) | |||||
if levels: | |||||
filtered = [head for head in filtered if head.level in levels] | |||||
if matches or include_lead is False or (not include_lead and levels): | |||||
buffers = [] | |||||
else: | |||||
buffers = [(maxsize, 0)] | |||||
sections = [] | |||||
i = 0 | |||||
while i < len(self.nodes): | |||||
if self.nodes[i] in headings: | |||||
this = self.nodes[i].level | |||||
for (level, start) in buffers: | |||||
if this <= level: | |||||
sections.append(Wikicode(self.nodes[start:i])) | |||||
buffers = [buf for buf in buffers if buf[0] < this] | |||||
if self.nodes[i] in filtered: | |||||
if not include_headings: | |||||
i += 1 | |||||
if i >= len(self.nodes): | |||||
break | |||||
buffers.append((this, i)) | |||||
i += 1 | |||||
for (level, start) in buffers: | |||||
if start != i: | |||||
sections.append(Wikicode(self.nodes[start:i])) | |||||
return sections | |||||
title_matcher = self._build_matcher(matches, flags) | |||||
matcher = lambda heading: (title_matcher(heading.title) and | |||||
(not levels or heading.level in levels)) | |||||
iheadings = self._indexed_ifilter(recursive=False, forcetype=Heading) | |||||
sections = [] # Tuples of (index_of_first_node, section) | |||||
open_headings = [] # Tuples of (index, heading), where index and | |||||
# heading.level are both monotonically increasing | |||||
# Add the lead section if appropriate: | |||||
if include_lead or not (include_lead is not None or matches or levels): | |||||
itr = self._indexed_ifilter(recursive=False, forcetype=Heading) | |||||
try: | |||||
first = next(itr)[0] | |||||
sections.append((0, Wikicode(self.nodes[:first]))) | |||||
except StopIteration: # No headings in page | |||||
sections.append((0, Wikicode(self.nodes[:]))) | |||||
# Iterate over headings, adding sections to the list as they end: | |||||
for i, heading in iheadings: | |||||
if flat: # With flat, all sections close at the next heading | |||||
newly_closed, open_headings = open_headings, [] | |||||
else: # Otherwise, figure out which sections have closed, if any | |||||
closed_start_index = len(open_headings) | |||||
for j, (start, last_heading) in enumerate(open_headings): | |||||
if heading.level <= last_heading.level: | |||||
closed_start_index = j | |||||
break | |||||
newly_closed = open_headings[closed_start_index:] | |||||
del open_headings[closed_start_index:] | |||||
for start, closed_heading in newly_closed: | |||||
if matcher(closed_heading): | |||||
sections.append((start, Wikicode(self.nodes[start:i]))) | |||||
start = i if include_headings else (i + 1) | |||||
open_headings.append((start, heading)) | |||||
# Add any remaining open headings to the list of sections: | |||||
for start, heading in open_headings: | |||||
if matcher(heading): | |||||
sections.append((start, Wikicode(self.nodes[start:]))) | |||||
# Ensure that earlier sections are earlier in the returned list: | |||||
return [section for i, section in sorted(sections)] | |||||
def strip_code(self, normalize=True, collapse=True): | def strip_code(self, normalize=True, collapse=True): | ||||
"""Return a rendered string without unprintable code such as templates. | """Return a rendered string without unprintable code such as templates. | ||||
The way a node is stripped is handled by the | The way a node is stripped is handled by the | ||||
:py:meth:`~.Node.__showtree__` method of :py:class:`~.Node` objects, | |||||
which generally return a subset of their nodes or ``None``. For | |||||
example, templates and tags are removed completely, links are stripped | |||||
to just their display part, headings are stripped to just their title. | |||||
If *normalize* is ``True``, various things may be done to strip code | |||||
:py:meth:`~.Node.__strip__` method of :py:class:`~.Node` objects, which | |||||
generally return a subset of their nodes or ``None``. For example, | |||||
templates and tags are removed completely, links are stripped to just | |||||
their display part, headings are stripped to just their title. If | |||||
*normalize* is ``True``, various things may be done to strip code | |||||
further, such as converting HTML entities like ``Σ``, ``Σ``, | further, such as converting HTML entities like ``Σ``, ``Σ``, | ||||
and ``Σ`` to ``Σ``. If *collapse* is ``True``, we will try to | and ``Σ`` to ``Σ``. If *collapse* is ``True``, we will try to | ||||
remove excess whitespace as well (three or more newlines are converted | remove excess whitespace as well (three or more newlines are converted | ||||
@@ -1,7 +1,7 @@ | |||||
#! /usr/bin/env python | #! /usr/bin/env python | ||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,10 +21,16 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
import sys | |||||
if (sys.version_info[0] == 2 and sys.version_info[1] < 6) or \ | |||||
(sys.version_info[1] == 3 and sys.version_info[1] < 2): | |||||
raise Exception('mwparserfromhell needs Python 2.6+ or 3.2+') | |||||
from setuptools import setup, find_packages, Extension | from setuptools import setup, find_packages, Extension | ||||
from mwparserfromhell import __version__ | from mwparserfromhell import __version__ | ||||
from mwparserfromhell.compat import py3k | |||||
from mwparserfromhell.compat import py26, py3k | |||||
with open("README.rst") as fp: | with open("README.rst") as fp: | ||||
long_docs = fp.read() | long_docs = fp.read() | ||||
@@ -36,10 +42,11 @@ setup( | |||||
name = "mwparserfromhell", | name = "mwparserfromhell", | ||||
packages = find_packages(exclude=("tests",)), | packages = find_packages(exclude=("tests",)), | ||||
ext_modules = [tokenizer], | ext_modules = [tokenizer], | ||||
test_suite = "tests", | |||||
tests_require = ["unittest2"] if py26 else [], | |||||
test_suite = "tests.discover", | |||||
version = __version__, | version = __version__, | ||||
author = "Ben Kurtovic", | author = "Ben Kurtovic", | ||||
author_email = "ben.kurtovic@verizon.net", | |||||
author_email = "ben.kurtovic@gmail.com", | |||||
url = "https://github.com/earwig/mwparserfromhell", | url = "https://github.com/earwig/mwparserfromhell", | ||||
description = "MWParserFromHell is a parser for MediaWiki wikicode.", | description = "MWParserFromHell is a parser for MediaWiki wikicode.", | ||||
long_description = long_docs, | long_description = long_docs, | ||||
@@ -52,10 +59,12 @@ setup( | |||||
"Intended Audience :: Developers", | "Intended Audience :: Developers", | ||||
"License :: OSI Approved :: MIT License", | "License :: OSI Approved :: MIT License", | ||||
"Operating System :: OS Independent", | "Operating System :: OS Independent", | ||||
"Programming Language :: Python :: 2.6", | |||||
"Programming Language :: Python :: 2.7", | "Programming Language :: Python :: 2.7", | ||||
"Programming Language :: Python :: 3", | "Programming Language :: Python :: 3", | ||||
"Programming Language :: Python :: 3.2", | "Programming Language :: Python :: 3.2", | ||||
"Programming Language :: Python :: 3.3", | "Programming Language :: Python :: 3.3", | ||||
"Programming Language :: Python :: 3.4", | |||||
"Topic :: Text Processing :: Markup" | "Topic :: Text Processing :: Markup" | ||||
], | ], | ||||
) | ) |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,8 +21,13 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
from unittest import TestCase | |||||
try: | |||||
from unittest2 import TestCase | |||||
except ImportError: | |||||
from unittest import TestCase | |||||
from mwparserfromhell.compat import range | |||||
from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, | from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, | ||||
Tag, Template, Text, Wikilink) | Tag, Template, Text, Wikilink) | ||||
from mwparserfromhell.nodes.extras import Attribute, Parameter | from mwparserfromhell.nodes.extras import Attribute, Parameter | ||||
@@ -32,15 +37,6 @@ from mwparserfromhell.wikicode import Wikicode | |||||
wrap = lambda L: Wikicode(SmartList(L)) | wrap = lambda L: Wikicode(SmartList(L)) | ||||
wraptext = lambda *args: wrap([Text(t) for t in args]) | wraptext = lambda *args: wrap([Text(t) for t in args]) | ||||
def getnodes(code): | |||||
"""Iterate over all child nodes of a given parent node. | |||||
Imitates Wikicode._get_all_nodes(). | |||||
""" | |||||
for node in code.nodes: | |||||
for context, child in node.__iternodes__(getnodes): | |||||
yield child | |||||
class TreeEqualityTestCase(TestCase): | class TreeEqualityTestCase(TestCase): | ||||
"""A base test case with support for comparing the equality of node trees. | """A base test case with support for comparing the equality of node trees. | ||||
@@ -106,7 +102,7 @@ class TreeEqualityTestCase(TestCase): | |||||
self.assertEqual(exp_attr.pad_first, act_attr.pad_first) | self.assertEqual(exp_attr.pad_first, act_attr.pad_first) | ||||
self.assertEqual(exp_attr.pad_before_eq, act_attr.pad_before_eq) | self.assertEqual(exp_attr.pad_before_eq, act_attr.pad_before_eq) | ||||
self.assertEqual(exp_attr.pad_after_eq, act_attr.pad_after_eq) | self.assertEqual(exp_attr.pad_after_eq, act_attr.pad_after_eq) | ||||
self.assertIs(expected.wiki_markup, actual.wiki_markup) | |||||
self.assertEqual(expected.wiki_markup, actual.wiki_markup) | |||||
self.assertIs(expected.self_closing, actual.self_closing) | self.assertIs(expected.self_closing, actual.self_closing) | ||||
self.assertIs(expected.invalid, actual.invalid) | self.assertIs(expected.invalid, actual.invalid) | ||||
self.assertIs(expected.implicit, actual.implicit) | self.assertIs(expected.implicit, actual.implicit) | ||||
@@ -9,12 +9,10 @@ the main library. | |||||
from mwparserfromhell.compat import py3k | from mwparserfromhell.compat import py3k | ||||
if py3k: | if py3k: | ||||
range = range | |||||
from io import StringIO | from io import StringIO | ||||
from urllib.parse import urlencode | from urllib.parse import urlencode | ||||
from urllib.request import urlopen | from urllib.request import urlopen | ||||
else: | else: | ||||
range = xrange | |||||
from StringIO import StringIO | from StringIO import StringIO | ||||
from urllib import urlencode, urlopen | from urllib import urlencode, urlopen |
@@ -0,0 +1,24 @@ | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Discover tests using ``unittest2` for Python 2.6. | |||||
It appears the default distutils test suite doesn't play nice with | |||||
``setUpClass`` thereby making some tests fail. Using ``unittest2`` to load | |||||
tests seems to work around that issue. | |||||
http://stackoverflow.com/a/17004409/753501 | |||||
""" | |||||
import os.path | |||||
from mwparserfromhell.compat import py26 | |||||
if py26: | |||||
import unittest2 as unittest | |||||
else: | |||||
import unittest | |||||
def additional_tests(): | |||||
project_root = os.path.split(os.path.dirname(__file__))[0] | |||||
return unittest.defaultTestLoader.discover(project_root) |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,12 +21,16 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
import unittest | |||||
try: | |||||
import unittest2 as unittest | |||||
except ImportError: | |||||
import unittest | |||||
from mwparserfromhell.compat import str | from mwparserfromhell.compat import str | ||||
from mwparserfromhell.nodes import Argument, Text | from mwparserfromhell.nodes import Argument, Text | ||||
from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext | |||||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | |||||
class TestArgument(TreeEqualityTestCase): | class TestArgument(TreeEqualityTestCase): | ||||
"""Test cases for the Argument node.""" | """Test cases for the Argument node.""" | ||||
@@ -38,20 +42,15 @@ class TestArgument(TreeEqualityTestCase): | |||||
node2 = Argument(wraptext("foo"), wraptext("bar")) | node2 = Argument(wraptext("foo"), wraptext("bar")) | ||||
self.assertEqual("{{{foo|bar}}}", str(node2)) | self.assertEqual("{{{foo|bar}}}", str(node2)) | ||||
def test_iternodes(self): | |||||
"""test Argument.__iternodes__()""" | |||||
node1n1 = Text("foobar") | |||||
node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("baz") | |||||
node1 = Argument(wrap([node1n1])) | |||||
node2 = Argument(wrap([node2n1]), wrap([node2n2, node2n3])) | |||||
gen1 = node1.__iternodes__(getnodes) | |||||
gen2 = node2.__iternodes__(getnodes) | |||||
self.assertEqual((None, node1), next(gen1)) | |||||
self.assertEqual((None, node2), next(gen2)) | |||||
self.assertEqual((node1.name, node1n1), next(gen1)) | |||||
self.assertEqual((node2.name, node2n1), next(gen2)) | |||||
self.assertEqual((node2.default, node2n2), next(gen2)) | |||||
self.assertEqual((node2.default, node2n3), next(gen2)) | |||||
def test_children(self): | |||||
"""test Argument.__children__()""" | |||||
node1 = Argument(wraptext("foobar")) | |||||
node2 = Argument(wraptext("foo"), wrap([Text("bar"), Text("baz")])) | |||||
gen1 = node1.__children__() | |||||
gen2 = node2.__children__() | |||||
self.assertIs(node1.name, next(gen1)) | |||||
self.assertIs(node2.name, next(gen2)) | |||||
self.assertIs(node2.default, next(gen2)) | |||||
self.assertRaises(StopIteration, next, gen1) | self.assertRaises(StopIteration, next, gen1) | ||||
self.assertRaises(StopIteration, next, gen2) | self.assertRaises(StopIteration, next, gen2) | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,7 +21,11 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
import unittest | |||||
try: | |||||
import unittest2 as unittest | |||||
except ImportError: | |||||
import unittest | |||||
from mwparserfromhell.compat import str | from mwparserfromhell.compat import str | ||||
from mwparserfromhell.nodes import Template | from mwparserfromhell.nodes import Template | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,7 +21,11 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
import unittest | |||||
try: | |||||
import unittest2 as unittest | |||||
except ImportError: | |||||
import unittest | |||||
from mwparserfromhell.nodes import (Argument, Comment, ExternalLink, Heading, | from mwparserfromhell.nodes import (Argument, Comment, ExternalLink, Heading, | ||||
HTMLEntity, Tag, Template, Text, Wikilink) | HTMLEntity, Tag, Template, Text, Wikilink) | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,7 +21,11 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
import unittest | |||||
try: | |||||
import unittest2 as unittest | |||||
except ImportError: | |||||
import unittest | |||||
from mwparserfromhell.compat import str | from mwparserfromhell.compat import str | ||||
from mwparserfromhell.nodes import Comment | from mwparserfromhell.nodes import Comment | ||||
@@ -36,11 +40,10 @@ class TestComment(TreeEqualityTestCase): | |||||
node = Comment("foobar") | node = Comment("foobar") | ||||
self.assertEqual("<!--foobar-->", str(node)) | self.assertEqual("<!--foobar-->", str(node)) | ||||
def test_iternodes(self): | |||||
"""test Comment.__iternodes__()""" | |||||
def test_children(self): | |||||
"""test Comment.__children__()""" | |||||
node = Comment("foobar") | node = Comment("foobar") | ||||
gen = node.__iternodes__(None) | |||||
self.assertEqual((None, node), next(gen)) | |||||
gen = node.__children__() | |||||
self.assertRaises(StopIteration, next, gen) | self.assertRaises(StopIteration, next, gen) | ||||
def test_strip(self): | def test_strip(self): | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,7 +21,11 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
import unittest | |||||
try: | |||||
import unittest2 as unittest | |||||
except ImportError: | |||||
import unittest | |||||
try: | try: | ||||
from mwparserfromhell.parser._tokenizer import CTokenizer | from mwparserfromhell.parser._tokenizer import CTokenizer | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -22,7 +22,11 @@ | |||||
from __future__ import print_function, unicode_literals | from __future__ import print_function, unicode_literals | ||||
import json | import json | ||||
import unittest | |||||
try: | |||||
import unittest2 as unittest | |||||
except ImportError: | |||||
import unittest | |||||
import mwparserfromhell | import mwparserfromhell | ||||
from mwparserfromhell.compat import py3k, str | from mwparserfromhell.compat import py3k, str | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,12 +21,16 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
import unittest | |||||
try: | |||||
import unittest2 as unittest | |||||
except ImportError: | |||||
import unittest | |||||
from mwparserfromhell.compat import str | from mwparserfromhell.compat import str | ||||
from mwparserfromhell.nodes import ExternalLink, Text | from mwparserfromhell.nodes import ExternalLink, Text | ||||
from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext | |||||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | |||||
class TestExternalLink(TreeEqualityTestCase): | class TestExternalLink(TreeEqualityTestCase): | ||||
"""Test cases for the ExternalLink node.""" | """Test cases for the ExternalLink node.""" | ||||
@@ -43,21 +47,16 @@ class TestExternalLink(TreeEqualityTestCase): | |||||
wraptext("Example Web Page")) | wraptext("Example Web Page")) | ||||
self.assertEqual("[http://example.com/ Example Web Page]", str(node4)) | self.assertEqual("[http://example.com/ Example Web Page]", str(node4)) | ||||
def test_iternodes(self): | |||||
"""test ExternalLink.__iternodes__()""" | |||||
node1n1 = Text("http://example.com/") | |||||
node2n1 = Text("http://example.com/") | |||||
node2n2, node2n3 = Text("Example"), Text("Page") | |||||
node1 = ExternalLink(wrap([node1n1]), brackets=False) | |||||
node2 = ExternalLink(wrap([node2n1]), wrap([node2n2, node2n3])) | |||||
gen1 = node1.__iternodes__(getnodes) | |||||
gen2 = node2.__iternodes__(getnodes) | |||||
self.assertEqual((None, node1), next(gen1)) | |||||
self.assertEqual((None, node2), next(gen2)) | |||||
self.assertEqual((node1.url, node1n1), next(gen1)) | |||||
self.assertEqual((node2.url, node2n1), next(gen2)) | |||||
self.assertEqual((node2.title, node2n2), next(gen2)) | |||||
self.assertEqual((node2.title, node2n3), next(gen2)) | |||||
def test_children(self): | |||||
"""test ExternalLink.__children__()""" | |||||
node1 = ExternalLink(wraptext("http://example.com/"), brackets=False) | |||||
node2 = ExternalLink(wraptext("http://example.com/"), | |||||
wrap([Text("Example"), Text("Page")])) | |||||
gen1 = node1.__children__() | |||||
gen2 = node2.__children__() | |||||
self.assertEqual(node1.url, next(gen1)) | |||||
self.assertEqual(node2.url, next(gen2)) | |||||
self.assertEqual(node2.title, next(gen2)) | |||||
self.assertRaises(StopIteration, next, gen1) | self.assertRaises(StopIteration, next, gen1) | ||||
self.assertRaises(StopIteration, next, gen2) | self.assertRaises(StopIteration, next, gen2) | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,12 +21,16 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
import unittest | |||||
try: | |||||
import unittest2 as unittest | |||||
except ImportError: | |||||
import unittest | |||||
from mwparserfromhell.compat import str | from mwparserfromhell.compat import str | ||||
from mwparserfromhell.nodes import Heading, Text | from mwparserfromhell.nodes import Heading, Text | ||||
from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext | |||||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | |||||
class TestHeading(TreeEqualityTestCase): | class TestHeading(TreeEqualityTestCase): | ||||
"""Test cases for the Heading node.""" | """Test cases for the Heading node.""" | ||||
@@ -38,14 +42,11 @@ class TestHeading(TreeEqualityTestCase): | |||||
node2 = Heading(wraptext(" zzz "), 5) | node2 = Heading(wraptext(" zzz "), 5) | ||||
self.assertEqual("===== zzz =====", str(node2)) | self.assertEqual("===== zzz =====", str(node2)) | ||||
def test_iternodes(self): | |||||
"""test Heading.__iternodes__()""" | |||||
text1, text2 = Text("foo"), Text("bar") | |||||
node = Heading(wrap([text1, text2]), 3) | |||||
gen = node.__iternodes__(getnodes) | |||||
self.assertEqual((None, node), next(gen)) | |||||
self.assertEqual((node.title, text1), next(gen)) | |||||
self.assertEqual((node.title, text2), next(gen)) | |||||
def test_children(self): | |||||
"""test Heading.__children__()""" | |||||
node = Heading(wrap([Text("foo"), Text("bar")]), 3) | |||||
gen = node.__children__() | |||||
self.assertEqual(node.title, next(gen)) | |||||
self.assertRaises(StopIteration, next, gen) | self.assertRaises(StopIteration, next, gen) | ||||
def test_strip(self): | def test_strip(self): | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,7 +21,11 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
import unittest | |||||
try: | |||||
import unittest2 as unittest | |||||
except ImportError: | |||||
import unittest | |||||
from mwparserfromhell.compat import str | from mwparserfromhell.compat import str | ||||
from mwparserfromhell.nodes import HTMLEntity | from mwparserfromhell.nodes import HTMLEntity | ||||
@@ -42,11 +46,10 @@ class TestHTMLEntity(TreeEqualityTestCase): | |||||
self.assertEqual("k", str(node3)) | self.assertEqual("k", str(node3)) | ||||
self.assertEqual("l", str(node4)) | self.assertEqual("l", str(node4)) | ||||
def test_iternodes(self): | |||||
"""test HTMLEntity.__iternodes__()""" | |||||
def test_children(self): | |||||
"""test HTMLEntity.__children__()""" | |||||
node = HTMLEntity("nbsp", named=True, hexadecimal=False) | node = HTMLEntity("nbsp", named=True, hexadecimal=False) | ||||
gen = node.__iternodes__(None) | |||||
self.assertEqual((None, node), next(gen)) | |||||
gen = node.__children__() | |||||
self.assertRaises(StopIteration, next, gen) | self.assertRaises(StopIteration, next, gen) | ||||
def test_strip(self): | def test_strip(self): | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,7 +21,11 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
import unittest | |||||
try: | |||||
import unittest2 as unittest | |||||
except ImportError: | |||||
import unittest | |||||
from mwparserfromhell.compat import str | from mwparserfromhell.compat import str | ||||
from mwparserfromhell.nodes import Text | from mwparserfromhell.nodes import Text | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,24 +21,30 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
import unittest | |||||
try: | |||||
import unittest2 as unittest | |||||
except ImportError: | |||||
import unittest | |||||
from mwparserfromhell import parser | from mwparserfromhell import parser | ||||
from mwparserfromhell.nodes import Template, Text, Wikilink | |||||
from mwparserfromhell.compat import range | |||||
from mwparserfromhell.nodes import Tag, Template, Text, Wikilink | |||||
from mwparserfromhell.nodes.extras import Parameter | from mwparserfromhell.nodes.extras import Parameter | ||||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | ||||
from .compat import range | |||||
class TestParser(TreeEqualityTestCase): | class TestParser(TreeEqualityTestCase): | ||||
"""Tests for the Parser class itself, which tokenizes and builds nodes.""" | """Tests for the Parser class itself, which tokenizes and builds nodes.""" | ||||
def test_use_c(self): | def test_use_c(self): | ||||
"""make sure the correct tokenizer is used""" | """make sure the correct tokenizer is used""" | ||||
restore = parser.use_c | |||||
if parser.use_c: | if parser.use_c: | ||||
self.assertTrue(parser.Parser()._tokenizer.USES_C) | self.assertTrue(parser.Parser()._tokenizer.USES_C) | ||||
parser.use_c = False | parser.use_c = False | ||||
self.assertFalse(parser.Parser()._tokenizer.USES_C) | self.assertFalse(parser.Parser()._tokenizer.USES_C) | ||||
parser.use_c = restore | |||||
def test_parsing(self): | def test_parsing(self): | ||||
"""integration test for parsing overall""" | """integration test for parsing overall""" | ||||
@@ -62,5 +68,26 @@ class TestParser(TreeEqualityTestCase): | |||||
actual = parser.Parser().parse(text) | actual = parser.Parser().parse(text) | ||||
self.assertWikicodeEqual(expected, actual) | self.assertWikicodeEqual(expected, actual) | ||||
def test_skip_style_tags(self): | |||||
"""test Parser.parse(skip_style_tags=True)""" | |||||
def test(): | |||||
with_style = parser.Parser().parse(text, skip_style_tags=False) | |||||
without_style = parser.Parser().parse(text, skip_style_tags=True) | |||||
self.assertWikicodeEqual(a, with_style) | |||||
self.assertWikicodeEqual(b, without_style) | |||||
text = "This is an example with ''italics''!" | |||||
a = wrap([Text("This is an example with "), | |||||
Tag(wraptext("i"), wraptext("italics"), wiki_markup="''"), | |||||
Text("!")]) | |||||
b = wraptext("This is an example with ''italics''!") | |||||
restore = parser.use_c | |||||
if parser.use_c: | |||||
test() | |||||
parser.use_c = False | |||||
test() | |||||
parser.use_c = restore | |||||
if __name__ == "__main__": | if __name__ == "__main__": | ||||
unittest.main(verbosity=2) | unittest.main(verbosity=2) |
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,7 +21,11 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
import unittest | |||||
try: | |||||
import unittest2 as unittest | |||||
except ImportError: | |||||
import unittest | |||||
from mwparserfromhell.parser.tokenizer import Tokenizer | from mwparserfromhell.parser.tokenizer import Tokenizer | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,12 +21,14 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
import unittest | |||||
from mwparserfromhell.compat import py3k | |||||
from mwparserfromhell.smart_list import SmartList, _ListProxy | |||||
try: | |||||
import unittest2 as unittest | |||||
except ImportError: | |||||
import unittest | |||||
from .compat import range | |||||
from mwparserfromhell.compat import py3k, range | |||||
from mwparserfromhell.smart_list import SmartList, _ListProxy | |||||
class TestSmartList(unittest.TestCase): | class TestSmartList(unittest.TestCase): | ||||
"""Test cases for the SmartList class and its child, _ListProxy.""" | """Test cases for the SmartList class and its child, _ListProxy.""" | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -23,12 +23,14 @@ | |||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
from sys import getdefaultencoding | from sys import getdefaultencoding | ||||
from types import GeneratorType | from types import GeneratorType | ||||
import unittest | |||||
from mwparserfromhell.compat import bytes, py3k, py32, str | |||||
from mwparserfromhell.string_mixin import StringMixIn | |||||
try: | |||||
import unittest2 as unittest | |||||
except ImportError: | |||||
import unittest | |||||
from .compat import range | |||||
from mwparserfromhell.compat import bytes, py3k, py32, range, str | |||||
from mwparserfromhell.string_mixin import StringMixIn | |||||
class _FakeString(StringMixIn): | class _FakeString(StringMixIn): | ||||
def __init__(self, data): | def __init__(self, data): | ||||
@@ -59,8 +61,8 @@ class TestStringMixIn(unittest.TestCase): | |||||
else: | else: | ||||
methods.append("decode") | methods.append("decode") | ||||
for meth in methods: | for meth in methods: | ||||
expected = getattr(str, meth).__doc__ | |||||
actual = getattr(StringMixIn, meth).__doc__ | |||||
expected = getattr("foo", meth).__doc__ | |||||
actual = getattr(_FakeString("foo"), meth).__doc__ | |||||
self.assertEqual(expected, actual) | self.assertEqual(expected, actual) | ||||
def test_types(self): | def test_types(self): | ||||
@@ -109,12 +111,12 @@ class TestStringMixIn(unittest.TestCase): | |||||
self.assertFalse(str1 < str4) | self.assertFalse(str1 < str4) | ||||
self.assertTrue(str1 <= str4) | self.assertTrue(str1 <= str4) | ||||
self.assertTrue(str1 > str5) | |||||
self.assertTrue(str1 >= str5) | |||||
self.assertFalse(str1 == str5) | |||||
self.assertTrue(str1 != str5) | |||||
self.assertFalse(str1 < str5) | |||||
self.assertFalse(str1 <= str5) | |||||
self.assertFalse(str5 > str1) | |||||
self.assertFalse(str5 >= str1) | |||||
self.assertFalse(str5 == str1) | |||||
self.assertTrue(str5 != str1) | |||||
self.assertTrue(str5 < str1) | |||||
self.assertTrue(str5 <= str1) | |||||
def test_other_magics(self): | def test_other_magics(self): | ||||
"""test other magically implemented features, like len() and iter()""" | """test other magically implemented features, like len() and iter()""" | ||||
@@ -376,7 +378,7 @@ class TestStringMixIn(unittest.TestCase): | |||||
self.assertEqual(actual, str25.rsplit(None, 3)) | self.assertEqual(actual, str25.rsplit(None, 3)) | ||||
actual = [" this is a sentence with", "", "whitespace", ""] | actual = [" this is a sentence with", "", "whitespace", ""] | ||||
self.assertEqual(actual, str25.rsplit(" ", 3)) | self.assertEqual(actual, str25.rsplit(" ", 3)) | ||||
if py3k: | |||||
if py3k and not py32: | |||||
actual = [" this is a", "sentence", "with", "whitespace"] | actual = [" this is a", "sentence", "with", "whitespace"] | ||||
self.assertEqual(actual, str25.rsplit(maxsplit=3)) | self.assertEqual(actual, str25.rsplit(maxsplit=3)) | ||||
@@ -394,7 +396,7 @@ class TestStringMixIn(unittest.TestCase): | |||||
self.assertEqual(actual, str25.split(None, 3)) | self.assertEqual(actual, str25.split(None, 3)) | ||||
actual = ["", "", "", "this is a sentence with whitespace "] | actual = ["", "", "", "this is a sentence with whitespace "] | ||||
self.assertEqual(actual, str25.split(" ", 3)) | self.assertEqual(actual, str25.split(" ", 3)) | ||||
if py3k: | |||||
if py3k and not py32: | |||||
actual = ["this", "is", "a", "sentence with whitespace "] | actual = ["this", "is", "a", "sentence with whitespace "] | ||||
self.assertEqual(actual, str25.split(maxsplit=3)) | self.assertEqual(actual, str25.split(maxsplit=3)) | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,12 +21,16 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
import unittest | |||||
try: | |||||
import unittest2 as unittest | |||||
except ImportError: | |||||
import unittest | |||||
from mwparserfromhell.compat import str | from mwparserfromhell.compat import str | ||||
from mwparserfromhell.nodes import Tag, Template, Text | from mwparserfromhell.nodes import Tag, Template, Text | ||||
from mwparserfromhell.nodes.extras import Attribute | from mwparserfromhell.nodes.extras import Attribute | ||||
from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext | |||||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | |||||
agen = lambda name, value: Attribute(wraptext(name), wraptext(value)) | agen = lambda name, value: Attribute(wraptext(name), wraptext(value)) | ||||
agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), False) | agennq = lambda name, value: Attribute(wraptext(name), wraptext(value), False) | ||||
@@ -64,37 +68,30 @@ class TestTag(TreeEqualityTestCase): | |||||
self.assertEqual("----", str(node8)) | self.assertEqual("----", str(node8)) | ||||
self.assertEqual("''italics!''", str(node9)) | self.assertEqual("''italics!''", str(node9)) | ||||
def test_iternodes(self): | |||||
"""test Tag.__iternodes__()""" | |||||
node1n1, node1n2 = Text("ref"), Text("foobar") | |||||
node2n1, node3n1, node3n2 = Text("bold text"), Text("img"), Text("id") | |||||
node3n3, node3n4, node3n5 = Text("foo"), Text("class"), Text("bar") | |||||
def test_children(self): | |||||
"""test Tag.__children__()""" | |||||
# <ref>foobar</ref> | # <ref>foobar</ref> | ||||
node1 = Tag(wrap([node1n1]), wrap([node1n2])) | |||||
node1 = Tag(wraptext("ref"), wraptext("foobar")) | |||||
# '''bold text''' | # '''bold text''' | ||||
node2 = Tag(wraptext("b"), wrap([node2n1]), wiki_markup="'''") | |||||
node2 = Tag(wraptext("b"), wraptext("bold text"), wiki_markup="'''") | |||||
# <img id="foo" class="bar" /> | # <img id="foo" class="bar" /> | ||||
node3 = Tag(wrap([node3n1]), | |||||
attrs=[Attribute(wrap([node3n2]), wrap([node3n3])), | |||||
Attribute(wrap([node3n4]), wrap([node3n5]))], | |||||
node3 = Tag(wraptext("img"), | |||||
attrs=[Attribute(wraptext("id"), wraptext("foo")), | |||||
Attribute(wraptext("class"), wraptext("bar"))], | |||||
self_closing=True, padding=" ") | self_closing=True, padding=" ") | ||||
gen1 = node1.__iternodes__(getnodes) | |||||
gen2 = node2.__iternodes__(getnodes) | |||||
gen3 = node3.__iternodes__(getnodes) | |||||
self.assertEqual((None, node1), next(gen1)) | |||||
self.assertEqual((None, node2), next(gen2)) | |||||
self.assertEqual((None, node3), next(gen3)) | |||||
self.assertEqual((node1.tag, node1n1), next(gen1)) | |||||
self.assertEqual((node3.tag, node3n1), next(gen3)) | |||||
self.assertEqual((node3.attributes[0].name, node3n2), next(gen3)) | |||||
self.assertEqual((node3.attributes[0].value, node3n3), next(gen3)) | |||||
self.assertEqual((node3.attributes[1].name, node3n4), next(gen3)) | |||||
self.assertEqual((node3.attributes[1].value, node3n5), next(gen3)) | |||||
self.assertEqual((node1.contents, node1n2), next(gen1)) | |||||
self.assertEqual((node2.contents, node2n1), next(gen2)) | |||||
self.assertEqual((node1.closing_tag, node1n1), next(gen1)) | |||||
gen1 = node1.__children__() | |||||
gen2 = node2.__children__() | |||||
gen3 = node3.__children__() | |||||
self.assertEqual(node1.tag, next(gen1)) | |||||
self.assertEqual(node3.tag, next(gen3)) | |||||
self.assertEqual(node3.attributes[0].name, next(gen3)) | |||||
self.assertEqual(node3.attributes[0].value, next(gen3)) | |||||
self.assertEqual(node3.attributes[1].name, next(gen3)) | |||||
self.assertEqual(node3.attributes[1].value, next(gen3)) | |||||
self.assertEqual(node1.contents, next(gen1)) | |||||
self.assertEqual(node2.contents, next(gen2)) | |||||
self.assertEqual(node1.closing_tag, next(gen1)) | |||||
self.assertRaises(StopIteration, next, gen1) | self.assertRaises(StopIteration, next, gen1) | ||||
self.assertRaises(StopIteration, next, gen2) | self.assertRaises(StopIteration, next, gen2) | ||||
self.assertRaises(StopIteration, next, gen3) | self.assertRaises(StopIteration, next, gen3) | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,12 +21,16 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
import unittest | |||||
try: | |||||
import unittest2 as unittest | |||||
except ImportError: | |||||
import unittest | |||||
from mwparserfromhell.compat import str | from mwparserfromhell.compat import str | ||||
from mwparserfromhell.nodes import HTMLEntity, Template, Text | from mwparserfromhell.nodes import HTMLEntity, Template, Text | ||||
from mwparserfromhell.nodes.extras import Parameter | from mwparserfromhell.nodes.extras import Parameter | ||||
from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext | |||||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | |||||
pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True) | pgens = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=True) | ||||
pgenh = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=False) | pgenh = lambda k, v: Parameter(wraptext(k), wraptext(v), showkey=False) | ||||
@@ -42,27 +46,21 @@ class TestTemplate(TreeEqualityTestCase): | |||||
[pgenh("1", "bar"), pgens("abc", "def")]) | [pgenh("1", "bar"), pgens("abc", "def")]) | ||||
self.assertEqual("{{foo|bar|abc=def}}", str(node2)) | self.assertEqual("{{foo|bar|abc=def}}", str(node2)) | ||||
def test_iternodes(self): | |||||
"""test Template.__iternodes__()""" | |||||
node1n1 = Text("foobar") | |||||
node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("abc") | |||||
node2n4, node2n5 = Text("def"), Text("ghi") | |||||
node2p1 = Parameter(wraptext("1"), wrap([node2n2]), showkey=False) | |||||
node2p2 = Parameter(wrap([node2n3]), wrap([node2n4, node2n5]), | |||||
def test_children(self): | |||||
"""test Template.__children__()""" | |||||
node2p1 = Parameter(wraptext("1"), wraptext("bar"), showkey=False) | |||||
node2p2 = Parameter(wraptext("abc"), wrap([Text("def"), Text("ghi")]), | |||||
showkey=True) | showkey=True) | ||||
node1 = Template(wrap([node1n1])) | |||||
node2 = Template(wrap([node2n1]), [node2p1, node2p2]) | |||||
node1 = Template(wraptext("foobar")) | |||||
node2 = Template(wraptext("foo"), [node2p1, node2p2]) | |||||
gen1 = node1.__iternodes__(getnodes) | |||||
gen2 = node2.__iternodes__(getnodes) | |||||
self.assertEqual((None, node1), next(gen1)) | |||||
self.assertEqual((None, node2), next(gen2)) | |||||
self.assertEqual((node1.name, node1n1), next(gen1)) | |||||
self.assertEqual((node2.name, node2n1), next(gen2)) | |||||
self.assertEqual((node2.params[0].value, node2n2), next(gen2)) | |||||
self.assertEqual((node2.params[1].name, node2n3), next(gen2)) | |||||
self.assertEqual((node2.params[1].value, node2n4), next(gen2)) | |||||
self.assertEqual((node2.params[1].value, node2n5), next(gen2)) | |||||
gen1 = node1.__children__() | |||||
gen2 = node2.__children__() | |||||
self.assertEqual(node1.name, next(gen1)) | |||||
self.assertEqual(node2.name, next(gen2)) | |||||
self.assertEqual(node2.params[0].value, next(gen2)) | |||||
self.assertEqual(node2.params[1].name, next(gen2)) | |||||
self.assertEqual(node2.params[1].value, next(gen2)) | |||||
self.assertRaises(StopIteration, next, gen1) | self.assertRaises(StopIteration, next, gen1) | ||||
self.assertRaises(StopIteration, next, gen2) | self.assertRaises(StopIteration, next, gen2) | ||||
@@ -123,15 +121,15 @@ class TestTemplate(TreeEqualityTestCase): | |||||
node3 = Template(wraptext("foo"), | node3 = Template(wraptext("foo"), | ||||
[pgenh("1", "a"), pgens("b", "c"), pgens("1", "d")]) | [pgenh("1", "a"), pgens("b", "c"), pgens("1", "d")]) | ||||
node4 = Template(wraptext("foo"), [pgenh("1", "a"), pgens("b", " ")]) | node4 = Template(wraptext("foo"), [pgenh("1", "a"), pgens("b", " ")]) | ||||
self.assertFalse(node1.has("foobar")) | |||||
self.assertTrue(node2.has(1)) | |||||
self.assertTrue(node2.has("abc")) | |||||
self.assertFalse(node2.has("def")) | |||||
self.assertTrue(node3.has("1")) | |||||
self.assertTrue(node3.has(" b ")) | |||||
self.assertFalse(node4.has("b")) | |||||
self.assertTrue(node3.has("b", False)) | |||||
self.assertFalse(node1.has("foobar", False)) | |||||
self.assertTrue(node2.has(1, False)) | |||||
self.assertTrue(node2.has("abc", False)) | |||||
self.assertFalse(node2.has("def", False)) | |||||
self.assertTrue(node3.has("1", False)) | |||||
self.assertTrue(node3.has(" b ", False)) | |||||
self.assertTrue(node4.has("b", False)) | self.assertTrue(node4.has("b", False)) | ||||
self.assertTrue(node3.has("b", True)) | |||||
self.assertFalse(node4.has("b", True)) | |||||
def test_get(self): | def test_get(self): | ||||
"""test Template.get()""" | """test Template.get()""" | ||||
@@ -223,6 +221,7 @@ class TestTemplate(TreeEqualityTestCase): | |||||
pgenh("1", "c"), pgenh("2", "d")]) | pgenh("1", "c"), pgenh("2", "d")]) | ||||
node40 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), | node40 = Template(wraptext("a"), [pgens("b", "c"), pgens("d", "e"), | ||||
pgens("f", "g")]) | pgens("f", "g")]) | ||||
node41 = Template(wraptext("a"), [pgenh("1", "")]) | |||||
node1.add("e", "f", showkey=True) | node1.add("e", "f", showkey=True) | ||||
node2.add(2, "g", showkey=False) | node2.add(2, "g", showkey=False) | ||||
@@ -266,6 +265,7 @@ class TestTemplate(TreeEqualityTestCase): | |||||
node38.add("1", "e") | node38.add("1", "e") | ||||
node39.add("1", "e") | node39.add("1", "e") | ||||
node40.add("d", "h", before="b") | node40.add("d", "h", before="b") | ||||
node41.add(1, "b") | |||||
self.assertEqual("{{a|b=c|d|e=f}}", node1) | self.assertEqual("{{a|b=c|d|e=f}}", node1) | ||||
self.assertEqual("{{a|b=c|d|g}}", node2) | self.assertEqual("{{a|b=c|d|g}}", node2) | ||||
@@ -312,6 +312,7 @@ class TestTemplate(TreeEqualityTestCase): | |||||
self.assertEqual("{{a|1=e|x=y|2=d}}", node38) | self.assertEqual("{{a|1=e|x=y|2=d}}", node38) | ||||
self.assertEqual("{{a|x=y|e|d}}", node39) | self.assertEqual("{{a|x=y|e|d}}", node39) | ||||
self.assertEqual("{{a|b=c|d=h|f=g}}", node40) | self.assertEqual("{{a|b=c|d=h|f=g}}", node40) | ||||
self.assertEqual("{{a|b}}", node41) | |||||
def test_remove(self): | def test_remove(self): | ||||
"""test Template.remove()""" | """test Template.remove()""" | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,7 +21,11 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
import unittest | |||||
try: | |||||
import unittest2 as unittest | |||||
except ImportError: | |||||
import unittest | |||||
from mwparserfromhell.compat import str | from mwparserfromhell.compat import str | ||||
from mwparserfromhell.nodes import Text | from mwparserfromhell.nodes import Text | ||||
@@ -36,11 +40,10 @@ class TestText(unittest.TestCase): | |||||
node2 = Text("fóóbar") | node2 = Text("fóóbar") | ||||
self.assertEqual("fóóbar", str(node2)) | self.assertEqual("fóóbar", str(node2)) | ||||
def test_iternodes(self): | |||||
"""test Text.__iternodes__()""" | |||||
def test_children(self): | |||||
"""test Text.__children__()""" | |||||
node = Text("foobar") | node = Text("foobar") | ||||
gen = node.__iternodes__(None) | |||||
self.assertEqual((None, node), next(gen)) | |||||
gen = node.__children__() | |||||
self.assertRaises(StopIteration, next, gen) | self.assertRaises(StopIteration, next, gen) | ||||
def test_strip(self): | def test_strip(self): | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,7 +21,11 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
import unittest | |||||
try: | |||||
import unittest2 as unittest | |||||
except ImportError: | |||||
import unittest | |||||
from mwparserfromhell.compat import py3k | from mwparserfromhell.compat import py3k | ||||
from mwparserfromhell.parser import tokens | from mwparserfromhell.parser import tokens | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,7 +21,11 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
import unittest | |||||
try: | |||||
import unittest2 as unittest | |||||
except ImportError: | |||||
import unittest | |||||
from mwparserfromhell.nodes import Template, Text | from mwparserfromhell.nodes import Template, Text | ||||
from mwparserfromhell.utils import parse_anything | from mwparserfromhell.utils import parse_anything | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -24,14 +24,18 @@ from __future__ import unicode_literals | |||||
from functools import partial | from functools import partial | ||||
import re | import re | ||||
from types import GeneratorType | from types import GeneratorType | ||||
import unittest | |||||
try: | |||||
import unittest2 as unittest | |||||
except ImportError: | |||||
import unittest | |||||
from mwparserfromhell.compat import py3k, str | |||||
from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, | from mwparserfromhell.nodes import (Argument, Comment, Heading, HTMLEntity, | ||||
Node, Tag, Template, Text, Wikilink) | Node, Tag, Template, Text, Wikilink) | ||||
from mwparserfromhell.smart_list import SmartList | from mwparserfromhell.smart_list import SmartList | ||||
from mwparserfromhell.wikicode import Wikicode | from mwparserfromhell.wikicode import Wikicode | ||||
from mwparserfromhell import parse | from mwparserfromhell import parse | ||||
from mwparserfromhell.compat import py3k, str | |||||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | ||||
@@ -242,6 +246,7 @@ class TestWikicode(TreeEqualityTestCase): | |||||
"""test Wikicode.matches()""" | """test Wikicode.matches()""" | ||||
code1 = parse("Cleanup") | code1 = parse("Cleanup") | ||||
code2 = parse("\nstub<!-- TODO: make more specific -->") | code2 = parse("\nstub<!-- TODO: make more specific -->") | ||||
code3 = parse("") | |||||
self.assertTrue(code1.matches("Cleanup")) | self.assertTrue(code1.matches("Cleanup")) | ||||
self.assertTrue(code1.matches("cleanup")) | self.assertTrue(code1.matches("cleanup")) | ||||
self.assertTrue(code1.matches(" cleanup\n")) | self.assertTrue(code1.matches(" cleanup\n")) | ||||
@@ -250,13 +255,22 @@ class TestWikicode(TreeEqualityTestCase): | |||||
self.assertTrue(code2.matches("stub")) | self.assertTrue(code2.matches("stub")) | ||||
self.assertTrue(code2.matches("Stub<!-- no, it's fine! -->")) | self.assertTrue(code2.matches("Stub<!-- no, it's fine! -->")) | ||||
self.assertFalse(code2.matches("StuB")) | self.assertFalse(code2.matches("StuB")) | ||||
self.assertTrue(code1.matches(("cleanup", "stub"))) | |||||
self.assertTrue(code2.matches(("cleanup", "stub"))) | |||||
self.assertFalse(code2.matches(("StuB", "sTUb", "foobar"))) | |||||
self.assertFalse(code2.matches(["StuB", "sTUb", "foobar"])) | |||||
self.assertTrue(code2.matches(("StuB", "sTUb", "foo", "bar", "Stub"))) | |||||
self.assertTrue(code2.matches(["StuB", "sTUb", "foo", "bar", "Stub"])) | |||||
self.assertTrue(code3.matches("")) | |||||
self.assertTrue(code3.matches("<!-- nothing -->")) | |||||
self.assertTrue(code3.matches(("a", "b", ""))) | |||||
def test_filter_family(self): | def test_filter_family(self): | ||||
"""test the Wikicode.i?filter() family of functions""" | """test the Wikicode.i?filter() family of functions""" | ||||
def genlist(gen): | def genlist(gen): | ||||
self.assertIsInstance(gen, GeneratorType) | self.assertIsInstance(gen, GeneratorType) | ||||
return list(gen) | return list(gen) | ||||
ifilter = lambda code: (lambda **kw: genlist(code.ifilter(**kw))) | |||||
ifilter = lambda code: (lambda *a, **k: genlist(code.ifilter(*a, **k))) | |||||
code = parse("a{{b}}c[[d]]{{{e}}}{{f}}[[g]]") | code = parse("a{{b}}c[[d]]{{{e}}}{{f}}[[g]]") | ||||
for func in (code.filter, ifilter(code)): | for func in (code.filter, ifilter(code)): | ||||
@@ -292,21 +306,27 @@ class TestWikicode(TreeEqualityTestCase): | |||||
"{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"], | "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"], | ||||
func(recursive=True, forcetype=Template)) | func(recursive=True, forcetype=Template)) | ||||
code3 = parse("{{foobar}}{{FOO}}{{baz}}{{bz}}") | |||||
code3 = parse("{{foobar}}{{FOO}}{{baz}}{{bz}}{{barfoo}}") | |||||
for func in (code3.filter, ifilter(code3)): | for func in (code3.filter, ifilter(code3)): | ||||
self.assertEqual(["{{foobar}}", "{{FOO}}"], func(recursive=False, matches=r"foo")) | |||||
self.assertEqual(["{{foobar}}", "{{barfoo}}"], | |||||
func(False, matches=lambda node: "foo" in node)) | |||||
self.assertEqual(["{{foobar}}", "{{FOO}}", "{{barfoo}}"], | |||||
func(False, matches=r"foo")) | |||||
self.assertEqual(["{{foobar}}", "{{FOO}}"], | self.assertEqual(["{{foobar}}", "{{FOO}}"], | ||||
func(recursive=False, matches=r"^{{foo.*?}}")) | |||||
func(matches=r"^{{foo.*?}}")) | |||||
self.assertEqual(["{{foobar}}"], | self.assertEqual(["{{foobar}}"], | ||||
func(recursive=False, matches=r"^{{foo.*?}}", flags=re.UNICODE)) | |||||
self.assertEqual(["{{baz}}", "{{bz}}"], func(recursive=False, matches=r"^{{b.*?z")) | |||||
self.assertEqual(["{{baz}}"], func(recursive=False, matches=r"^{{b.+?z}}")) | |||||
func(matches=r"^{{foo.*?}}", flags=re.UNICODE)) | |||||
self.assertEqual(["{{baz}}", "{{bz}}"], func(matches=r"^{{b.*?z")) | |||||
self.assertEqual(["{{baz}}"], func(matches=r"^{{b.+?z}}")) | |||||
self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"], | self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}"], | ||||
code2.filter_templates(recursive=False)) | code2.filter_templates(recursive=False)) | ||||
self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}", | self.assertEqual(["{{a|{{b}}|{{c|d={{f}}{{h}}}}}}", "{{b}}", | ||||
"{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"], | "{{c|d={{f}}{{h}}}}", "{{f}}", "{{h}}"], | ||||
code2.filter_templates(recursive=True)) | code2.filter_templates(recursive=True)) | ||||
self.assertEqual(["{{foobar}}"], code3.filter_templates( | |||||
matches=lambda node: node.name.matches("Foobar"))) | |||||
self.assertEqual(["{{baz}}", "{{bz}}"], | self.assertEqual(["{{baz}}", "{{bz}}"], | ||||
code3.filter_templates(matches=r"^{{b.*?z")) | code3.filter_templates(matches=r"^{{b.*?z")) | ||||
self.assertEqual([], code3.filter_tags(matches=r"^{{b.*?z")) | self.assertEqual([], code3.filter_tags(matches=r"^{{b.*?z")) | ||||
@@ -335,35 +355,43 @@ class TestWikicode(TreeEqualityTestCase): | |||||
p4_III = "== Section III ==\n" + p4_IIIA | p4_III = "== Section III ==\n" + p4_IIIA | ||||
page4 = parse(p4_lead + p4_I + p4_II + p4_III) | page4 = parse(p4_lead + p4_I + p4_II + p4_III) | ||||
self.assertEqual([], page1.get_sections()) | |||||
self.assertEqual([""], page1.get_sections()) | |||||
self.assertEqual(["", "==Heading=="], page2.get_sections()) | self.assertEqual(["", "==Heading=="], page2.get_sections()) | ||||
self.assertEqual(["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n", | self.assertEqual(["", "===Heading===\nFoo bar baz\n====Gnidaeh====\n", | ||||
"====Gnidaeh====\n"], page3.get_sections()) | "====Gnidaeh====\n"], page3.get_sections()) | ||||
self.assertEqual([p4_lead, p4_IA, p4_I, p4_IB, p4_IB1, p4_II, | |||||
p4_IIIA1a, p4_III, p4_IIIA, p4_IIIA2, p4_IIIA2ai1], | |||||
self.assertEqual([p4_lead, p4_I, p4_IA, p4_IB, p4_IB1, p4_II, | |||||
p4_III, p4_IIIA, p4_IIIA1a, p4_IIIA2, p4_IIIA2ai1], | |||||
page4.get_sections()) | page4.get_sections()) | ||||
self.assertEqual(["====Gnidaeh====\n"], page3.get_sections(levels=[4])) | self.assertEqual(["====Gnidaeh====\n"], page3.get_sections(levels=[4])) | ||||
self.assertEqual(["===Heading===\nFoo bar baz\n====Gnidaeh====\n"], | self.assertEqual(["===Heading===\nFoo bar baz\n====Gnidaeh====\n"], | ||||
page3.get_sections(levels=(2, 3))) | page3.get_sections(levels=(2, 3))) | ||||
self.assertEqual(["===Heading===\nFoo bar baz\n"], | |||||
page3.get_sections(levels=(2, 3), flat=True)) | |||||
self.assertEqual([], page3.get_sections(levels=[0])) | self.assertEqual([], page3.get_sections(levels=[0])) | ||||
self.assertEqual(["", "====Gnidaeh====\n"], | self.assertEqual(["", "====Gnidaeh====\n"], | ||||
page3.get_sections(levels=[4], include_lead=True)) | page3.get_sections(levels=[4], include_lead=True)) | ||||
self.assertEqual(["===Heading===\nFoo bar baz\n====Gnidaeh====\n", | self.assertEqual(["===Heading===\nFoo bar baz\n====Gnidaeh====\n", | ||||
"====Gnidaeh====\n"], | "====Gnidaeh====\n"], | ||||
page3.get_sections(include_lead=False)) | page3.get_sections(include_lead=False)) | ||||
self.assertEqual(["===Heading===\nFoo bar baz\n", "====Gnidaeh====\n"], | |||||
page3.get_sections(flat=True, include_lead=False)) | |||||
self.assertEqual([p4_IB1, p4_IIIA2], page4.get_sections(levels=[4])) | self.assertEqual([p4_IB1, p4_IIIA2], page4.get_sections(levels=[4])) | ||||
self.assertEqual([""], page2.get_sections(include_headings=False)) | |||||
self.assertEqual([p4_IA, p4_IB, p4_IIIA], page4.get_sections(levels=[3])) | |||||
self.assertEqual([p4_IA, "=== Section I.B ===\n", | |||||
"=== Section III.A ===\nText.\n"], | |||||
page4.get_sections(levels=[3], flat=True)) | |||||
self.assertEqual(["", ""], page2.get_sections(include_headings=False)) | |||||
self.assertEqual(["\nSection I.B.1 body.\n\n•Some content.\n\n", | self.assertEqual(["\nSection I.B.1 body.\n\n•Some content.\n\n", | ||||
"\nEven more text.\n" + p4_IIIA2ai1], | "\nEven more text.\n" + p4_IIIA2ai1], | ||||
page4.get_sections(levels=[4], | page4.get_sections(levels=[4], | ||||
include_headings=False)) | include_headings=False)) | ||||
self.assertEqual([], page4.get_sections(matches=r"body")) | self.assertEqual([], page4.get_sections(matches=r"body")) | ||||
self.assertEqual([p4_IA, p4_I, p4_IB, p4_IB1], | |||||
self.assertEqual([p4_I, p4_IA, p4_IB, p4_IB1], | |||||
page4.get_sections(matches=r"Section\sI[.\s].*?")) | page4.get_sections(matches=r"Section\sI[.\s].*?")) | ||||
self.assertEqual([p4_IA, p4_IIIA1a, p4_IIIA, p4_IIIA2, p4_IIIA2ai1], | |||||
self.assertEqual([p4_IA, p4_IIIA, p4_IIIA1a, p4_IIIA2, p4_IIIA2ai1], | |||||
page4.get_sections(matches=r".*?a.*?")) | page4.get_sections(matches=r".*?a.*?")) | ||||
self.assertEqual([p4_IIIA1a, p4_IIIA2ai1], | self.assertEqual([p4_IIIA1a, p4_IIIA2ai1], | ||||
page4.get_sections(matches=r".*?a.*?", flags=re.U)) | page4.get_sections(matches=r".*?a.*?", flags=re.U)) | ||||
@@ -371,6 +399,11 @@ class TestWikicode(TreeEqualityTestCase): | |||||
page4.get_sections(matches=r".*?a.*?", flags=re.U, | page4.get_sections(matches=r".*?a.*?", flags=re.U, | ||||
include_headings=False)) | include_headings=False)) | ||||
sections = page2.get_sections(include_headings=False) | |||||
sections[0].append("Lead!\n") | |||||
sections[1].append("\nFirst section!") | |||||
self.assertEqual("Lead!\n==Heading==\nFirst section!", page2) | |||||
page5 = parse("X\n== Foo ==\nBar\n== Baz ==\nBuzz") | page5 = parse("X\n== Foo ==\nBar\n== Baz ==\nBuzz") | ||||
section = page5.get_sections(matches="Foo")[0] | section = page5.get_sections(matches="Foo")[0] | ||||
section.replace("\nBar\n", "\nBarf ") | section.replace("\nBar\n", "\nBarf ") | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net> | |||||
# Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,12 +21,16 @@ | |||||
# SOFTWARE. | # SOFTWARE. | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
import unittest | |||||
try: | |||||
import unittest2 as unittest | |||||
except ImportError: | |||||
import unittest | |||||
from mwparserfromhell.compat import str | from mwparserfromhell.compat import str | ||||
from mwparserfromhell.nodes import Text, Wikilink | from mwparserfromhell.nodes import Text, Wikilink | ||||
from ._test_tree_equality import TreeEqualityTestCase, getnodes, wrap, wraptext | |||||
from ._test_tree_equality import TreeEqualityTestCase, wrap, wraptext | |||||
class TestWikilink(TreeEqualityTestCase): | class TestWikilink(TreeEqualityTestCase): | ||||
"""Test cases for the Wikilink node.""" | """Test cases for the Wikilink node.""" | ||||
@@ -38,20 +42,15 @@ class TestWikilink(TreeEqualityTestCase): | |||||
node2 = Wikilink(wraptext("foo"), wraptext("bar")) | node2 = Wikilink(wraptext("foo"), wraptext("bar")) | ||||
self.assertEqual("[[foo|bar]]", str(node2)) | self.assertEqual("[[foo|bar]]", str(node2)) | ||||
def test_iternodes(self): | |||||
"""test Wikilink.__iternodes__()""" | |||||
node1n1 = Text("foobar") | |||||
node2n1, node2n2, node2n3 = Text("foo"), Text("bar"), Text("baz") | |||||
node1 = Wikilink(wrap([node1n1])) | |||||
node2 = Wikilink(wrap([node2n1]), wrap([node2n2, node2n3])) | |||||
gen1 = node1.__iternodes__(getnodes) | |||||
gen2 = node2.__iternodes__(getnodes) | |||||
self.assertEqual((None, node1), next(gen1)) | |||||
self.assertEqual((None, node2), next(gen2)) | |||||
self.assertEqual((node1.title, node1n1), next(gen1)) | |||||
self.assertEqual((node2.title, node2n1), next(gen2)) | |||||
self.assertEqual((node2.text, node2n2), next(gen2)) | |||||
self.assertEqual((node2.text, node2n3), next(gen2)) | |||||
def test_children(self): | |||||
"""test Wikilink.__children__()""" | |||||
node1 = Wikilink(wraptext("foobar")) | |||||
node2 = Wikilink(wraptext("foo"), wrap([Text("bar"), Text("baz")])) | |||||
gen1 = node1.__children__() | |||||
gen2 = node2.__children__() | |||||
self.assertEqual(node1.title, next(gen1)) | |||||
self.assertEqual(node2.title, next(gen2)) | |||||
self.assertEqual(node2.text, next(gen2)) | |||||
self.assertRaises(StopIteration, next, gen1) | self.assertRaises(StopIteration, next, gen1) | ||||
self.assertRaises(StopIteration, next, gen2) | self.assertRaises(StopIteration, next, gen2) | ||||
@@ -150,3 +150,31 @@ name: comment_inside_bracketed_link | |||||
label: an HTML comment inside a bracketed external link | label: an HTML comment inside a bracketed external link | ||||
input: "[http://example.com/foo<!--comment-->bar]" | input: "[http://example.com/foo<!--comment-->bar]" | ||||
output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com/foo"), CommentStart(), Text(text="comment"), CommentEnd(), Text(text="bar"), ExternalLinkClose()] | output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com/foo"), CommentStart(), Text(text="comment"), CommentEnd(), Text(text="bar"), ExternalLinkClose()] | ||||
--- | |||||
name: wikilink_inside_external_link | |||||
label: a wikilink inside an external link, which the parser considers valid (see issue #61) | |||||
input: "[http://example.com/foo Foo [[Bar]]]" | |||||
output: [ExternalLinkOpen(brackets=True), Text(text="http://example.com/foo"), ExternalLinkSeparator(), Text(text="Foo "), WikilinkOpen(), Text(text="Bar"), WikilinkClose(), ExternalLinkClose()] | |||||
--- | |||||
name: external_link_inside_wikilink | |||||
label: an external link inside a wikilink, valid in the case of images (see issue #62) | |||||
input: "[[File:Example.png|thumb|http://example.com]]" | |||||
output: [WikilinkOpen(), Text(text="File:Example.png"), WikilinkSeparator(), Text(text="thumb|"), ExternalLinkOpen(brackets=False), Text(text="http://example.com"), ExternalLinkClose(), WikilinkClose()] | |||||
--- | |||||
name: external_link_inside_wikilink_brackets | |||||
label: an external link with brackets inside a wikilink | |||||
input: "[[File:Example.png|thumb|[http://example.com Example]]]" | |||||
output: [WikilinkOpen(), Text(text="File:Example.png"), WikilinkSeparator(), Text(text="thumb|"), ExternalLinkOpen(brackets=True), Text(text="http://example.com"), ExternalLinkSeparator(), Text(text="Example"), ExternalLinkClose(), WikilinkClose()] | |||||
--- | |||||
name: external_link_inside_wikilink_title | |||||
label: an external link inside a wikilink title, which is invalid | |||||
input: "[[File:Example.png http://example.com]]" | |||||
output: [WikilinkOpen(), Text(text="File:Example.png http://example.com"), WikilinkClose()] |
@@ -54,6 +54,20 @@ output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="bar[b | |||||
--- | --- | ||||
name: nested | |||||
label: a wikilink nested within another | |||||
input: "[[foo|[[bar]]]]" | |||||
output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), WikilinkOpen(), Text(text="bar"), WikilinkClose(), WikilinkClose()] | |||||
--- | |||||
name: nested_padding | |||||
label: a wikilink nested within another, separated by other data | |||||
input: "[[foo|a[[b]]c]]" | |||||
output: [WikilinkOpen(), Text(text="foo"), WikilinkSeparator(), Text(text="a"), WikilinkOpen(), Text(text="b"), WikilinkClose(), Text(text="c"), WikilinkClose()] | |||||
--- | |||||
name: invalid_newline | name: invalid_newline | ||||
label: invalid wikilink: newline as only content | label: invalid wikilink: newline as only content | ||||
input: "[[\n]]" | input: "[[\n]]" | ||||
@@ -103,27 +117,13 @@ output: [Text(text="[[foo"), WikilinkOpen(), Text(text="bar"), WikilinkClose(), | |||||
--- | --- | ||||
name: invalid_nested_text | |||||
label: invalid wikilink: a wikilink nested within the value of another | |||||
name: invalid_nested_no_close | |||||
label: invalid wikilink: a wikilink nested within the value of another, missing a pair of closing brackets | |||||
input: "[[foo|[[bar]]" | input: "[[foo|[[bar]]" | ||||
output: [Text(text="[[foo|"), WikilinkOpen(), Text(text="bar"), WikilinkClose()] | output: [Text(text="[[foo|"), WikilinkOpen(), Text(text="bar"), WikilinkClose()] | ||||
--- | --- | ||||
name: invalid_nested_text_2 | |||||
label: invalid wikilink: a wikilink nested within the value of another, two pairs of closing brackets | |||||
input: "[[foo|[[bar]]]]" | |||||
output: [Text(text="[[foo|"), WikilinkOpen(), Text(text="bar"), WikilinkClose(), Text(text="]]")] | |||||
name: invalid_nested_text_padding | |||||
label: invalid wikilink: a wikilink nested within the value of another, separated by other data | |||||
input: "[[foo|a[[b]]c]]" | |||||
output: [Text(text="[[foo|a"), WikilinkOpen(), Text(text="b"), WikilinkClose(), Text(text="c]]")] | |||||
name: incomplete_open_only | name: incomplete_open_only | ||||
label: incomplete wikilinks: just an open | label: incomplete wikilinks: just an open | ||||
input: "[[" | input: "[[" | ||||