A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

266 lines
7.4 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from __future__ import unicode_literals
  23. from .compat import py3k, str
  24. __all__ = ["StringMixIn"]
  25. def inheritdoc(method):
  26. method.__doc__ = getattr(str, method.__name__).__doc__
  27. return method
  28. class StringMixIn(object):
  29. if py3k:
  30. def __str__(self):
  31. return self.__unicode__()
  32. def __bytes__(self):
  33. return self.__unicode__().encode("utf8")
  34. else:
  35. def __str__(self):
  36. return self.__unicode__().encode("utf8")
  37. def __unicode__(self):
  38. raise NotImplementedError()
  39. def __repr__(self):
  40. return repr(self.__unicode__())
  41. def __lt__(self, other):
  42. if isinstance(other, StringMixIn):
  43. return self.__unicode__() < other.__unicode__()
  44. return self.__unicode__() < other
  45. def __le__(self, other):
  46. if isinstance(other, StringMixIn):
  47. return self.__unicode__() <= other.__unicode__()
  48. return self.__unicode__() <= other
  49. def __eq__(self, other):
  50. if isinstance(other, StringMixIn):
  51. return self.__unicode__() == other.__unicode__()
  52. return self.__unicode__() == other
  53. def __ne__(self, other):
  54. if isinstance(other, StringMixIn):
  55. return self.__unicode__() != other.__unicode__()
  56. return self.__unicode__() != other
  57. def __gt__(self, other):
  58. if isinstance(other, StringMixIn):
  59. return self.__unicode__() > other.__unicode__()
  60. return self.__unicode__() > other
  61. def __ge__(self, other):
  62. if isinstance(other, StringMixIn):
  63. return self.__unicode__() >= other.__unicode__()
  64. return self.__unicode__() >= other
  65. if py3k:
  66. def __bool__(self):
  67. return bool(self.__unicode__())
  68. else:
  69. def __nonzero__(self):
  70. return bool(self.__unicode__())
  71. def __len__(self):
  72. return len(self.__unicode__())
  73. def __iter__(self):
  74. for char in self.__unicode__():
  75. yield char
  76. def __getitem__(self, key):
  77. return self.__unicode__()[key]
  78. def __contains__(self, item):
  79. if isinstance(item, StringMixIn):
  80. return str(item) in self.__unicode__()
  81. return item in self.__unicode__()
  82. @inheritdoc
  83. def capitalize(self):
  84. return self.__unicode__().capitalize()
  85. @inheritdoc
  86. def center(self, width, fillchar=None):
  87. return self.__unicode__().center(width, fillchar)
  88. @inheritdoc
  89. def count(self, sub=None, start=None, end=None):
  90. return self.__unicode__().count(sub, start, end)
  91. if not py3k:
  92. @inheritdoc
  93. def decode(self, encoding=None, errors=None):
  94. return self.__unicode__().decode(encoding, errors)
  95. @inheritdoc
  96. def encode(self, encoding=None, errors=None):
  97. return self.__unicode__().encode(encoding, errors)
  98. @inheritdoc
  99. def endswith(self, prefix, start=None, end=None):
  100. return self.__unicode__().endswith(prefix, start, end)
  101. @inheritdoc
  102. def expandtabs(self, tabsize=None):
  103. return self.__unicode__().expandtabs(tabsize)
  104. @inheritdoc
  105. def find(self, sub=None, start=None, end=None):
  106. return self.__unicode__().find(sub, start, end)
  107. @inheritdoc
  108. def format(self, *args, **kwargs):
  109. return self.__unicode__().format(*args, **kwargs)
  110. @inheritdoc
  111. def index(self, sub=None, start=None, end=None):
  112. return self.__unicode__().index(sub, start, end)
  113. @inheritdoc
  114. def isalnum(self):
  115. return self.__unicode__().isalnum()
  116. @inheritdoc
  117. def isalpha(self):
  118. return self.__unicode__().isalpha()
  119. @inheritdoc
  120. def isdecimal(self):
  121. return self.__unicode__().isdecimal()
  122. @inheritdoc
  123. def isdigit(self):
  124. return self.__unicode__().isdigit()
  125. @inheritdoc
  126. def islower(self):
  127. return self.__unicode__().islower()
  128. @inheritdoc
  129. def isnumeric(self):
  130. return self.__unicode__().isnumeric()
  131. @inheritdoc
  132. def isspace(self):
  133. return self.__unicode__().isspace()
  134. @inheritdoc
  135. def istitle(self):
  136. return self.__unicode__().istitle()
  137. @inheritdoc
  138. def isupper(self):
  139. return self.__unicode__().isupper()
  140. @inheritdoc
  141. def join(self, iterable):
  142. return self.__unicode__().join(iterable)
  143. @inheritdoc
  144. def ljust(self, width, fillchar=None):
  145. return self.__unicode__().ljust(width, fillchar)
  146. @inheritdoc
  147. def lower(self):
  148. return self.__unicode__().lower()
  149. @inheritdoc
  150. def lstrip(self, chars=None):
  151. return self.__unicode__().lstrip(chars)
  152. @inheritdoc
  153. def partition(self, sep):
  154. return self.__unicode__().partition(sep)
  155. @inheritdoc
  156. def replace(self, old, new, count):
  157. return self.__unicode__().replace(old, new, count)
  158. @inheritdoc
  159. def rfind(self, sub=None, start=None, end=None):
  160. return self.__unicode__().rfind(sub, start, end)
  161. @inheritdoc
  162. def rindex(self, sub=None, start=None, end=None):
  163. return self.__unicode__().rindex(sub, start, end)
  164. @inheritdoc
  165. def rjust(self, width, fillchar=None):
  166. return self.__unicode__().rjust(width, fillchar)
  167. @inheritdoc
  168. def rpartition(self, sep):
  169. return self.__unicode__().rpartition(sep)
  170. @inheritdoc
  171. def rsplit(self, sep=None, maxsplit=None):
  172. return self.__unicode__().rsplit(sep, maxsplit)
  173. @inheritdoc
  174. def rstrip(self, chars=None):
  175. return self.__unicode__().rstrip(chars)
  176. @inheritdoc
  177. def split(self, sep=None, maxsplit=None):
  178. return self.__unicode__().split(sep, maxsplit)
  179. @inheritdoc
  180. def splitlines(self, keepends=None):
  181. return self.__unicode__().splitlines(keepends)
  182. @inheritdoc
  183. def startswith(self, prefix, start=None, end=None):
  184. return self.__unicode__().startswith(prefix, start, end)
  185. @inheritdoc
  186. def strip(self, chars=None):
  187. return self.__unicode__().strip(chars)
  188. @inheritdoc
  189. def swapcase(self):
  190. return self.__unicode__().swapcase()
  191. @inheritdoc
  192. def title(self):
  193. return self.__unicode__().title()
  194. @inheritdoc
  195. def translate(self, table, deletechars=None):
  196. return self.__unicode__().translate(table, deletechars)
  197. @inheritdoc
  198. def upper(self):
  199. return self.__unicode__().upper()
  200. @inheritdoc
  201. def zfill(self, width):
  202. return self.__unicode__().zfill(width)
  203. del inheritdoc