A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

368 lines
11 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. """
  23. This module contains the :py:class:`~.StringMixIn` type, which implements the
  24. interface for the ``unicode`` type (``str`` on py3k) in a dynamic manner.
  25. """
  26. from __future__ import unicode_literals
  27. from .compat import py3k, str
  28. __all__ = ["StringMixIn"]
  29. def inheritdoc(method):
  30. """Set __doc__ of *method* to __doc__ of *method* in its parent class.
  31. Since this is used on :py:class:`~.StringMixIn`, the "parent class" used is
  32. ``str``. This function can be used as a decorator.
  33. """
  34. method.__doc__ = getattr(str, method.__name__).__doc__
  35. return method
  36. class StringMixIn(object):
  37. """Implement the interface for ``unicode``/``str`` in a dynamic manner.
  38. To use this class, inherit from it and override the :py:meth:`__unicode__`
  39. method (same on py3k) to return the string representation of the object.
  40. The various string methods will operate on the value of
  41. :py:meth:`__unicode__` instead of the immutable ``self`` like the regular
  42. ``str`` type.
  43. """
  44. if py3k:
  45. def __str__(self):
  46. return self.__unicode__()
  47. def __bytes__(self):
  48. return self.__unicode__().encode("utf8")
  49. else:
  50. def __str__(self):
  51. return self.__unicode__().encode("utf8")
  52. def __unicode__(self):
  53. raise NotImplementedError()
  54. def __repr__(self):
  55. return repr(self.__unicode__())
  56. def __lt__(self, other):
  57. if isinstance(other, StringMixIn):
  58. return self.__unicode__() < other.__unicode__()
  59. return self.__unicode__() < other
  60. def __le__(self, other):
  61. if isinstance(other, StringMixIn):
  62. return self.__unicode__() <= other.__unicode__()
  63. return self.__unicode__() <= other
  64. def __eq__(self, other):
  65. if isinstance(other, StringMixIn):
  66. return self.__unicode__() == other.__unicode__()
  67. return self.__unicode__() == other
  68. def __ne__(self, other):
  69. if isinstance(other, StringMixIn):
  70. return self.__unicode__() != other.__unicode__()
  71. return self.__unicode__() != other
  72. def __gt__(self, other):
  73. if isinstance(other, StringMixIn):
  74. return self.__unicode__() > other.__unicode__()
  75. return self.__unicode__() > other
  76. def __ge__(self, other):
  77. if isinstance(other, StringMixIn):
  78. return self.__unicode__() >= other.__unicode__()
  79. return self.__unicode__() >= other
  80. if py3k:
  81. def __bool__(self):
  82. return bool(self.__unicode__())
  83. else:
  84. def __nonzero__(self):
  85. return bool(self.__unicode__())
  86. def __len__(self):
  87. return len(self.__unicode__())
  88. def __iter__(self):
  89. for char in self.__unicode__():
  90. yield char
  91. def __getitem__(self, key):
  92. return self.__unicode__()[key]
  93. def __reversed__(self):
  94. return reversed(self.__unicode__())
  95. def __contains__(self, item):
  96. if isinstance(item, StringMixIn):
  97. return str(item) in self.__unicode__()
  98. return item in self.__unicode__()
  99. @inheritdoc
  100. def capitalize(self):
  101. return self.__unicode__().capitalize()
  102. if py3k:
  103. @inheritdoc
  104. def casefold(self):
  105. return self.__unicode__().casefold()
  106. @inheritdoc
  107. def center(self, width, fillchar=None):
  108. if fillchar is None:
  109. return self.__unicode__().center(width)
  110. return self.__unicode__().center(width, fillchar)
  111. @inheritdoc
  112. def count(self, sub, start=None, end=None):
  113. return self.__unicode__().count(sub, start, end)
  114. if not py3k:
  115. @inheritdoc
  116. def decode(self, encoding=None, errors=None):
  117. kwargs = {}
  118. if encoding is not None:
  119. kwargs["encoding"] = encoding
  120. if errors is not None:
  121. kwargs["errors"] = errors
  122. return self.__unicode__().decode(**kwargs)
  123. @inheritdoc
  124. def encode(self, encoding=None, errors=None):
  125. kwargs = {}
  126. if encoding is not None:
  127. kwargs["encoding"] = encoding
  128. if errors is not None:
  129. kwargs["errors"] = errors
  130. return self.__unicode__().encode(**kwargs)
  131. @inheritdoc
  132. def endswith(self, prefix, start=None, end=None):
  133. return self.__unicode__().endswith(prefix, start, end)
  134. @inheritdoc
  135. def expandtabs(self, tabsize=None):
  136. if tabsize is None:
  137. return self.__unicode__().expandtabs()
  138. return self.__unicode__().expandtabs(tabsize)
  139. @inheritdoc
  140. def find(self, sub, start=None, end=None):
  141. return self.__unicode__().find(sub, start, end)
  142. @inheritdoc
  143. def format(self, *args, **kwargs):
  144. return self.__unicode__().format(*args, **kwargs)
  145. if py3k:
  146. @inheritdoc
  147. def format_map(self, mapping):
  148. return self.__unicode__().format_map(mapping)
  149. @inheritdoc
  150. def index(self, sub, start=None, end=None):
  151. return self.__unicode__().index(sub, start, end)
  152. @inheritdoc
  153. def isalnum(self):
  154. return self.__unicode__().isalnum()
  155. @inheritdoc
  156. def isalpha(self):
  157. return self.__unicode__().isalpha()
  158. @inheritdoc
  159. def isdecimal(self):
  160. return self.__unicode__().isdecimal()
  161. @inheritdoc
  162. def isdigit(self):
  163. return self.__unicode__().isdigit()
  164. if py3k:
  165. @inheritdoc
  166. def isidentifier(self):
  167. return self.__unicode__().isidentifier()
  168. @inheritdoc
  169. def islower(self):
  170. return self.__unicode__().islower()
  171. @inheritdoc
  172. def isnumeric(self):
  173. return self.__unicode__().isnumeric()
  174. if py3k:
  175. @inheritdoc
  176. def isprintable(self):
  177. return self.__unicode__().isprintable()
  178. @inheritdoc
  179. def isspace(self):
  180. return self.__unicode__().isspace()
  181. @inheritdoc
  182. def istitle(self):
  183. return self.__unicode__().istitle()
  184. @inheritdoc
  185. def isupper(self):
  186. return self.__unicode__().isupper()
  187. @inheritdoc
  188. def join(self, iterable):
  189. return self.__unicode__().join(iterable)
  190. @inheritdoc
  191. def ljust(self, width, fillchar=None):
  192. if fillchar is None:
  193. return self.__unicode__().ljust(width)
  194. return self.__unicode__().ljust(width, fillchar)
  195. @inheritdoc
  196. def lower(self):
  197. return self.__unicode__().lower()
  198. @inheritdoc
  199. def lstrip(self, chars=None):
  200. return self.__unicode__().lstrip(chars)
  201. if py3k:
  202. @staticmethod
  203. @inheritdoc
  204. def maketrans(self, x, y=None, z=None):
  205. if z is None:
  206. if y is None:
  207. return self.__unicode__.maketrans(x)
  208. return self.__unicode__.maketrans(x, y)
  209. return self.__unicode__.maketrans(x, y, z)
  210. @inheritdoc
  211. def partition(self, sep):
  212. return self.__unicode__().partition(sep)
  213. @inheritdoc
  214. def replace(self, old, new, count=None):
  215. if count is None:
  216. return self.__unicode__().replace(old, new)
  217. return self.__unicode__().replace(old, new, count)
  218. @inheritdoc
  219. def rfind(self, sub, start=None, end=None):
  220. return self.__unicode__().rfind(sub, start, end)
  221. @inheritdoc
  222. def rindex(self, sub, start=None, end=None):
  223. return self.__unicode__().rindex(sub, start, end)
  224. @inheritdoc
  225. def rjust(self, width, fillchar=None):
  226. if fillchar is None:
  227. return self.__unicode__().rjust(width)
  228. return self.__unicode__().rjust(width, fillchar)
  229. @inheritdoc
  230. def rpartition(self, sep):
  231. return self.__unicode__().rpartition(sep)
  232. if py3k:
  233. @inheritdoc
  234. def rsplit(self, sep=None, maxsplit=None):
  235. kwargs = {}
  236. if sep is not None:
  237. kwargs["sep"] = sep
  238. if maxsplit is not None:
  239. kwargs["maxsplit"] = maxsplit
  240. return self.__unicode__().rsplit(**kwargs)
  241. else:
  242. @inheritdoc
  243. def rsplit(self, sep=None, maxsplit=None):
  244. if maxsplit is None:
  245. if sep is None:
  246. return self.__unicode__().rsplit()
  247. return self.__unicode__().rsplit(sep)
  248. return self.__unicode__().rsplit(sep, maxsplit)
  249. @inheritdoc
  250. def rstrip(self, chars=None):
  251. return self.__unicode__().rstrip(chars)
  252. if py3k:
  253. @inheritdoc
  254. def split(self, sep=None, maxsplit=None):
  255. kwargs = {}
  256. if sep is not None:
  257. kwargs["sep"] = sep
  258. if maxsplit is not None:
  259. kwargs["maxsplit"] = maxsplit
  260. return self.__unicode__().split(**kwargs)
  261. else:
  262. @inheritdoc
  263. def split(self, sep=None, maxsplit=None):
  264. if maxsplit is None:
  265. if sep is None:
  266. return self.__unicode__().split()
  267. return self.__unicode__().split(sep)
  268. return self.__unicode__().split(sep, maxsplit)
  269. @inheritdoc
  270. def splitlines(self, keepends=None):
  271. if keepends is None:
  272. return self.__unicode__().splitlines()
  273. return self.__unicode__().splitlines(keepends)
  274. @inheritdoc
  275. def startswith(self, prefix, start=None, end=None):
  276. return self.__unicode__().startswith(prefix, start, end)
  277. @inheritdoc
  278. def strip(self, chars=None):
  279. return self.__unicode__().strip(chars)
  280. @inheritdoc
  281. def swapcase(self):
  282. return self.__unicode__().swapcase()
  283. @inheritdoc
  284. def title(self):
  285. return self.__unicode__().title()
  286. @inheritdoc
  287. def translate(self, table):
  288. return self.__unicode__().translate(table)
  289. @inheritdoc
  290. def upper(self):
  291. return self.__unicode__().upper()
  292. @inheritdoc
  293. def zfill(self, width):
  294. return self.__unicode__().zfill(width)
  295. del inheritdoc