A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

309 lines
11 KiB

  1. #
  2. # Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com>
  3. #
  4. # Permission is hereby granted, free of charge, to any person obtaining a copy
  5. # of this software and associated documentation files (the "Software"), to deal
  6. # in the Software without restriction, including without limitation the rights
  7. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. # copies of the Software, and to permit persons to whom the Software is
  9. # furnished to do so, subject to the following conditions:
  10. #
  11. # The above copyright notice and this permission notice shall be included in
  12. # all copies or substantial portions of the Software.
  13. #
  14. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20. # SOFTWARE.
  21. from . import Node
  22. from .extras import Attribute
  23. from ..definitions import is_visible
  24. from ..utils import parse_anything
  25. __all__ = ["Tag"]
  26. class Tag(Node):
  27. """Represents an HTML-style tag in wikicode, like ``<ref>``."""
  28. def __init__(self, tag, contents=None, attrs=None, wiki_markup=None,
  29. self_closing=False, invalid=False, implicit=False, padding="",
  30. closing_tag=None, wiki_style_separator=None,
  31. closing_wiki_markup=None):
  32. super().__init__()
  33. self.tag = tag
  34. self.contents = contents
  35. self._attrs = attrs if attrs else []
  36. self._closing_wiki_markup = None
  37. self.wiki_markup = wiki_markup
  38. self.self_closing = self_closing
  39. self.invalid = invalid
  40. self.implicit = implicit
  41. self.padding = padding
  42. if closing_tag is not None:
  43. self.closing_tag = closing_tag
  44. self.wiki_style_separator = wiki_style_separator
  45. if closing_wiki_markup is not None:
  46. self.closing_wiki_markup = closing_wiki_markup
  47. def __unicode__(self):
  48. if self.wiki_markup:
  49. if self.attributes:
  50. attrs = "".join([str(attr) for attr in self.attributes])
  51. else:
  52. attrs = ""
  53. padding = self.padding or ""
  54. separator = self.wiki_style_separator or ""
  55. if self.self_closing:
  56. return self.wiki_markup + attrs + padding + separator
  57. else:
  58. close = self.closing_wiki_markup or ""
  59. return self.wiki_markup + attrs + padding + separator + \
  60. str(self.contents) + close
  61. result = ("</" if self.invalid else "<") + str(self.tag)
  62. if self.attributes:
  63. result += "".join([str(attr) for attr in self.attributes])
  64. if self.self_closing:
  65. result += self.padding + (">" if self.implicit else "/>")
  66. else:
  67. result += self.padding + ">" + str(self.contents)
  68. result += "</" + str(self.closing_tag) + ">"
  69. return result
  70. def __children__(self):
  71. if not self.wiki_markup:
  72. yield self.tag
  73. for attr in self.attributes:
  74. yield attr.name
  75. if attr.value is not None:
  76. yield attr.value
  77. if not self.self_closing:
  78. yield self.contents
  79. if not self.wiki_markup and self.closing_tag:
  80. yield self.closing_tag
  81. def __strip__(self, **kwargs):
  82. if self.contents and is_visible(self.tag):
  83. return self.contents.strip_code(**kwargs)
  84. return None
  85. def __showtree__(self, write, get, mark):
  86. write("</" if self.invalid else "<")
  87. get(self.tag)
  88. for attr in self.attributes:
  89. get(attr.name)
  90. if not attr.value:
  91. continue
  92. write(" = ")
  93. mark()
  94. get(attr.value)
  95. if self.self_closing:
  96. write(">" if self.implicit else "/>")
  97. else:
  98. write(">")
  99. get(self.contents)
  100. write("</")
  101. get(self.closing_tag)
  102. write(">")
  103. @property
  104. def tag(self):
  105. """The tag itself, as a :class:`.Wikicode` object."""
  106. return self._tag
  107. @property
  108. def contents(self):
  109. """The contents of the tag, as a :class:`.Wikicode` object."""
  110. return self._contents
  111. @property
  112. def attributes(self):
  113. """The list of attributes affecting the tag.
  114. Each attribute is an instance of :class:`.Attribute`.
  115. """
  116. return self._attrs
  117. @property
  118. def wiki_markup(self):
  119. """The wikified version of a tag to show instead of HTML.
  120. If set to a value, this will be displayed instead of the brackets.
  121. For example, set to ``''`` to replace ``<i>`` or ``----`` to replace
  122. ``<hr>``.
  123. """
  124. return self._wiki_markup
  125. @property
  126. def self_closing(self):
  127. """Whether the tag is self-closing with no content (like ``<br/>``)."""
  128. return self._self_closing
  129. @property
  130. def invalid(self):
  131. """Whether the tag starts with a backslash after the opening bracket.
  132. This makes the tag look like a lone close tag. It is technically
  133. invalid and is only parsable Wikicode when the tag itself is
  134. single-only, like ``<br>`` and ``<img>``. See
  135. :func:`.definitions.is_single_only`.
  136. """
  137. return self._invalid
  138. @property
  139. def implicit(self):
  140. """Whether the tag is implicitly self-closing, with no ending slash.
  141. This is only possible for specific "single" tags like ``<br>`` and
  142. ``<li>``. See :func:`.definitions.is_single`. This field only has an
  143. effect if :attr:`self_closing` is also ``True``.
  144. """
  145. return self._implicit
  146. @property
  147. def padding(self):
  148. """Spacing to insert before the first closing ``>``."""
  149. return self._padding
  150. @property
  151. def closing_tag(self):
  152. """The closing tag, as a :class:`.Wikicode` object.
  153. This will usually equal :attr:`tag`, unless there is additional
  154. spacing, comments, or the like.
  155. """
  156. return self._closing_tag
  157. @property
  158. def wiki_style_separator(self):
  159. """The separator between the padding and content in a wiki markup tag.
  160. Essentially the wiki equivalent of the TagCloseOpen.
  161. """
  162. return self._wiki_style_separator
  163. @property
  164. def closing_wiki_markup(self):
  165. """The wikified version of the closing tag to show instead of HTML.
  166. If set to a value, this will be displayed instead of the close tag
  167. brackets. If tag is :attr:`self_closing` is ``True`` then this is not
  168. displayed. If :attr:`wiki_markup` is set and this has not been set, this
  169. is set to the value of :attr:`wiki_markup`. If this has been set and
  170. :attr:`wiki_markup` is set to a ``False`` value, this is set to
  171. ``None``.
  172. """
  173. return self._closing_wiki_markup
  174. @tag.setter
  175. def tag(self, value):
  176. self._tag = self._closing_tag = parse_anything(value)
  177. @contents.setter
  178. def contents(self, value):
  179. self._contents = parse_anything(value)
  180. @wiki_markup.setter
  181. def wiki_markup(self, value):
  182. self._wiki_markup = str(value) if value else None
  183. if not value or not self.closing_wiki_markup:
  184. self._closing_wiki_markup = self._wiki_markup
  185. @self_closing.setter
  186. def self_closing(self, value):
  187. self._self_closing = bool(value)
  188. @invalid.setter
  189. def invalid(self, value):
  190. self._invalid = bool(value)
  191. @implicit.setter
  192. def implicit(self, value):
  193. self._implicit = bool(value)
  194. @padding.setter
  195. def padding(self, value):
  196. if not value:
  197. self._padding = ""
  198. else:
  199. value = str(value)
  200. if not value.isspace():
  201. raise ValueError("padding must be entirely whitespace")
  202. self._padding = value
  203. @closing_tag.setter
  204. def closing_tag(self, value):
  205. self._closing_tag = parse_anything(value)
  206. @wiki_style_separator.setter
  207. def wiki_style_separator(self, value):
  208. self._wiki_style_separator = str(value) if value else None
  209. @closing_wiki_markup.setter
  210. def closing_wiki_markup(self, value):
  211. self._closing_wiki_markup = str(value) if value else None
  212. def has(self, name):
  213. """Return whether any attribute in the tag has the given *name*.
  214. Note that a tag may have multiple attributes with the same name, but
  215. only the last one is read by the MediaWiki parser.
  216. """
  217. for attr in self.attributes:
  218. if attr.name == name.strip():
  219. return True
  220. return False
  221. def get(self, name):
  222. """Get the attribute with the given *name*.
  223. The returned object is a :class:`.Attribute` instance. Raises
  224. :exc:`ValueError` if no attribute has this name. Since multiple
  225. attributes can have the same name, we'll return the last match, since
  226. all but the last are ignored by the MediaWiki parser.
  227. """
  228. for attr in reversed(self.attributes):
  229. if attr.name == name.strip():
  230. return attr
  231. raise ValueError(name)
  232. def add(self, name, value=None, quotes='"', pad_first=" ",
  233. pad_before_eq="", pad_after_eq=""):
  234. """Add an attribute with the given *name* and *value*.
  235. *name* and *value* can be anything parsable by
  236. :func:`.utils.parse_anything`; *value* can be omitted if the attribute
  237. is valueless. If *quotes* is not ``None``, it should be a string
  238. (either ``"`` or ``'``) that *value* will be wrapped in (this is
  239. recommended). ``None`` is only legal if *value* contains no spacing.
  240. *pad_first*, *pad_before_eq*, and *pad_after_eq* are whitespace used as
  241. padding before the name, before the equal sign (or after the name if no
  242. value), and after the equal sign (ignored if no value), respectively.
  243. """
  244. if value is not None:
  245. value = parse_anything(value)
  246. quotes = Attribute.coerce_quotes(quotes)
  247. attr = Attribute(parse_anything(name), value, quotes)
  248. attr.pad_first = pad_first
  249. attr.pad_before_eq = pad_before_eq
  250. attr.pad_after_eq = pad_after_eq
  251. self.attributes.append(attr)
  252. return attr
  253. def remove(self, name):
  254. """Remove all attributes with the given *name*.
  255. Raises :exc:`ValueError` if none were found.
  256. """
  257. attrs = [attr for attr in self.attributes if attr.name == name.strip()]
  258. if not attrs:
  259. raise ValueError(name)
  260. for attr in attrs:
  261. self.attributes.remove(attr)