A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

tag.py 7.3 KiB

11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2012-2013 Ben Kurtovic <ben.kurtovic@verizon.net>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from __future__ import unicode_literals
  23. from . import Node, Text
  24. from ..compat import str
  25. from ..utils import parse_anything
  26. __all__ = ["Tag"]
  27. class Tag(Node):
  28. """Represents an HTML-style tag in wikicode, like ``<ref>``."""
  29. TAG_UNKNOWN = 0
  30. # Basic HTML:
  31. TAG_ITALIC = 1
  32. TAG_BOLD = 2
  33. TAG_UNDERLINE = 3
  34. TAG_STRIKETHROUGH = 4
  35. TAG_UNORDERED_LIST = 5
  36. TAG_ORDERED_LIST = 6
  37. TAG_DEF_TERM = 7
  38. TAG_DEF_ITEM = 8
  39. TAG_BLOCKQUOTE = 9
  40. TAG_RULE = 10
  41. TAG_BREAK = 11
  42. TAG_ABBR = 12
  43. TAG_PRE = 13
  44. TAG_MONOSPACE = 14
  45. TAG_CODE = 15
  46. TAG_SPAN = 16
  47. TAG_DIV = 17
  48. TAG_FONT = 18
  49. TAG_SMALL = 19
  50. TAG_BIG = 20
  51. TAG_CENTER = 21
  52. # MediaWiki parser hooks:
  53. TAG_REF = 101
  54. TAG_GALLERY = 102
  55. TAG_MATH = 103
  56. TAG_NOWIKI = 104
  57. TAG_NOINCLUDE = 105
  58. TAG_INCLUDEONLY = 106
  59. TAG_ONLYINCLUDE = 107
  60. # Additional parser hooks:
  61. TAG_SYNTAXHIGHLIGHT = 201
  62. TAG_POEM = 202
  63. # Lists of tags:
  64. TAGS_INVISIBLE = set((TAG_REF, TAG_GALLERY, TAG_MATH, TAG_NOINCLUDE))
  65. TAGS_VISIBLE = set(range(300)) - TAGS_INVISIBLE
  66. def __init__(self, type_, tag, contents=None, attrs=None, showtag=True,
  67. self_closing=False, open_padding=0, close_padding=0):
  68. super(Tag, self).__init__()
  69. self._type = type_
  70. self._tag = tag
  71. self._contents = contents
  72. if attrs:
  73. self._attrs = attrs
  74. else:
  75. self._attrs = []
  76. self._showtag = showtag
  77. self._self_closing = self_closing
  78. self._open_padding = open_padding
  79. self._close_padding = close_padding
  80. def __unicode__(self):
  81. if not self.showtag:
  82. open_, close = self._translate()
  83. if self.self_closing:
  84. return open_
  85. else:
  86. return open_ + str(self.contents) + close
  87. result = "<" + str(self.tag)
  88. if self.attrs:
  89. result += " " + " ".join([str(attr) for attr in self.attrs])
  90. if self.self_closing:
  91. result += " " * self.open_padding + "/>"
  92. else:
  93. result += " " * self.open_padding + ">" + str(self.contents)
  94. result += "</" + str(self.tag) + " " * self.close_padding + ">"
  95. return result
  96. def __iternodes__(self, getter):
  97. yield None, self
  98. if self.showtag:
  99. for child in getter(self.tag):
  100. yield self.tag, child
  101. for attr in self.attrs:
  102. for child in getter(attr.name):
  103. yield attr.name, child
  104. if attr.value:
  105. for child in getter(attr.value):
  106. yield attr.value, child
  107. for child in getter(self.contents):
  108. yield self.contents, child
  109. def __strip__(self, normalize, collapse):
  110. if self.type in self.TAGS_VISIBLE:
  111. return self.contents.strip_code(normalize, collapse)
  112. return None
  113. def __showtree__(self, write, get, mark):
  114. tagnodes = self.tag.nodes
  115. if (not self.attrs and len(tagnodes) == 1 and isinstance(tagnodes[0], Text)):
  116. write("<" + str(tagnodes[0]) + ">")
  117. else:
  118. write("<")
  119. get(self.tag)
  120. for attr in self.attrs:
  121. get(attr.name)
  122. if not attr.value:
  123. continue
  124. write(" = ")
  125. mark()
  126. get(attr.value)
  127. write(">")
  128. get(self.contents)
  129. if len(tagnodes) == 1 and isinstance(tagnodes[0], Text):
  130. write("</" + str(tagnodes[0]) + ">")
  131. else:
  132. write("</")
  133. get(self.tag)
  134. write(">")
  135. def _translate(self):
  136. """If the HTML-style tag has a wikicode representation, return that.
  137. For example, ``<b>Foo</b>`` can be represented as ``'''Foo'''``. This
  138. returns a tuple of the character starting the sequence and the
  139. character ending it.
  140. """
  141. translations = {
  142. self.TAG_ITALIC: ("''", "''"),
  143. self.TAG_BOLD: ("'''", "'''"),
  144. self.TAG_UNORDERED_LIST: ("*", ""),
  145. self.TAG_ORDERED_LIST: ("#", ""),
  146. self.TAG_DEF_TERM: (";", ""),
  147. self.TAG_DEF_ITEM: (":", ""),
  148. self.TAG_RULE: ("----", ""),
  149. }
  150. return translations[self.type]
  151. @property
  152. def type(self):
  153. """The tag type."""
  154. return self._type
  155. @property
  156. def tag(self):
  157. """The tag itself, as a :py:class:`~.Wikicode` object."""
  158. return self._tag
  159. @property
  160. def contents(self):
  161. """The contents of the tag, as a :py:class:`~.Wikicode` object."""
  162. return self._contents
  163. @property
  164. def attrs(self):
  165. """The list of attributes affecting the tag.
  166. Each attribute is an instance of :py:class:`~.Attribute`.
  167. """
  168. return self._attrs
  169. @property
  170. def showtag(self):
  171. """Whether to show the tag itself instead of a wikicode version."""
  172. return self._showtag
  173. @property
  174. def self_closing(self):
  175. """Whether the tag is self-closing with no content."""
  176. return self._self_closing
  177. @property
  178. def open_padding(self):
  179. """How much spacing to insert before the first closing >."""
  180. return self._open_padding
  181. @property
  182. def close_padding(self):
  183. """How much spacing to insert before the last closing >."""
  184. return self._close_padding
  185. @type.setter
  186. def type(self, value):
  187. value = int(value)
  188. if value not in self.TAGS_INVISIBLE | self.TAGS_VISIBLE:
  189. raise ValueError(value)
  190. self._type = value
  191. @tag.setter
  192. def tag(self, value):
  193. self._tag = parse_anything(value)
  194. @contents.setter
  195. def contents(self, value):
  196. self._contents = parse_anything(value)
  197. @showtag.setter
  198. def showtag(self, value):
  199. self._showtag = bool(value)
  200. @self_closing.setter
  201. def self_closing(self, value):
  202. self._self_closing = bool(value)
  203. @open_padding.setter
  204. def open_padding(self, value):
  205. self._open_padding = int(value)
  206. @close_padding.setter
  207. def close_padding(self, value):
  208. self._close_padding = int(value)