A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
Não pode escolher mais do que 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

há 11 anos
há 11 anos
há 11 anos
há 11 anos
há 11 anos
há 11 anos
há 11 anos
há 11 anos
há 11 anos
há 11 anos
há 11 anos
há 11 anos
há 11 anos
há 11 anos
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from __future__ import unicode_literals
  23. from . import Node, Text
  24. from ..compat import str
  25. __all__ = ["Tag"]
  26. class Tag(Node):
  27. """Represents an HTML-style tag in wikicode, like ``<ref>``."""
  28. TAG_UNKNOWN = 0
  29. # Basic HTML:
  30. TAG_ITALIC = 1
  31. TAG_BOLD = 2
  32. TAG_UNDERLINE = 3
  33. TAG_STRIKETHROUGH = 4
  34. TAG_UNORDERED_LIST = 5
  35. TAG_ORDERED_LIST = 6
  36. TAG_DEF_TERM = 7
  37. TAG_DEF_ITEM = 8
  38. TAG_BLOCKQUOTE = 9
  39. TAG_RULE = 10
  40. TAG_BREAK = 11
  41. TAG_ABBR = 12
  42. TAG_PRE = 13
  43. TAG_MONOSPACE = 14
  44. TAG_CODE = 15
  45. TAG_SPAN = 16
  46. TAG_DIV = 17
  47. TAG_FONT = 18
  48. TAG_SMALL = 19
  49. TAG_BIG = 20
  50. TAG_CENTER = 21
  51. # MediaWiki parser hooks:
  52. TAG_REF = 101
  53. TAG_GALLERY = 102
  54. TAG_MATH = 103
  55. TAG_NOWIKI = 104
  56. TAG_NOINCLUDE = 105
  57. TAG_INCLUDEONLY = 106
  58. TAG_ONLYINCLUDE = 107
  59. # Additional parser hooks:
  60. TAG_SYNTAXHIGHLIGHT = 201
  61. TAG_POEM = 202
  62. # Lists of tags:
  63. TAGS_INVISIBLE = set((TAG_REF, TAG_GALLERY, TAG_MATH, TAG_NOINCLUDE))
  64. TAGS_VISIBLE = set(range(300)) - TAGS_INVISIBLE
  65. def __init__(self, type_, tag, contents=None, attrs=None, showtag=True,
  66. self_closing=False, open_padding=0, close_padding=0):
  67. super(Tag, self).__init__()
  68. self._type = type_
  69. self._tag = tag
  70. self._contents = contents
  71. if attrs:
  72. self._attrs = attrs
  73. else:
  74. self._attrs = []
  75. self._showtag = showtag
  76. self._self_closing = self_closing
  77. self._open_padding = open_padding
  78. self._close_padding = close_padding
  79. def __unicode__(self):
  80. if not self.showtag:
  81. open_, close = self._translate()
  82. if self.self_closing:
  83. return open_
  84. else:
  85. return open_ + str(self.contents) + close
  86. result = "<" + str(self.tag)
  87. if self.attrs:
  88. result += " " + " ".join([str(attr) for attr in self.attrs])
  89. if self.self_closing:
  90. result += " " * self.open_padding + "/>"
  91. else:
  92. result += " " * self.open_padding + ">" + str(self.contents)
  93. result += "</" + str(self.tag) + " " * self.close_padding + ">"
  94. return result
  95. def __iternodes__(self, getter):
  96. yield None, self
  97. if self.showtag:
  98. for child in getter(self.tag):
  99. yield self.tag, child
  100. for attr in self.attrs:
  101. for child in getter(attr.name):
  102. yield attr.name, child
  103. if attr.value:
  104. for child in getter(attr.value):
  105. yield attr.value, child
  106. for child in getter(self.contents):
  107. yield self.contents, child
  108. def __strip__(self, normalize, collapse):
  109. if self.type in self.TAGS_VISIBLE:
  110. return self.contents.strip_code(normalize, collapse)
  111. return None
  112. def __showtree__(self, write, get, mark):
  113. tagnodes = self.tag.nodes
  114. if (not self.attrs and len(tagnodes) == 1 and isinstance(tagnodes[0], Text)):
  115. write("<" + str(tagnodes[0]) + ">")
  116. else:
  117. write("<")
  118. get(self.tag)
  119. for attr in self.attrs:
  120. get(attr.name)
  121. if not attr.value:
  122. continue
  123. write(" = ")
  124. mark()
  125. get(attr.value)
  126. write(">")
  127. get(self.contents)
  128. if len(tagnodes) == 1 and isinstance(tagnodes[0], Text):
  129. write("</" + str(tagnodes[0]) + ">")
  130. else:
  131. write("</")
  132. get(self.tag)
  133. write(">")
  134. def _translate(self):
  135. """If the HTML-style tag has a wikicode representation, return that.
  136. For example, ``<b>Foo</b>`` can be represented as ``'''Foo'''``. This
  137. returns a tuple of the character starting the sequence and the
  138. character ending it.
  139. """
  140. translations = {
  141. self.TAG_ITALIC: ("''", "''"),
  142. self.TAG_BOLD: ("'''", "'''"),
  143. self.TAG_UNORDERED_LIST: ("*", ""),
  144. self.TAG_ORDERED_LIST: ("#", ""),
  145. self.TAG_DEF_TERM: (";", ""),
  146. self.TAG_DEF_ITEM: (":", ""),
  147. self.TAG_RULE: ("----", ""),
  148. }
  149. return translations[self.type]
  150. @property
  151. def type(self):
  152. """The tag type."""
  153. return self._type
  154. @property
  155. def tag(self):
  156. """The tag itself, as a ``Wikicode`` object."""
  157. return self._tag
  158. @property
  159. def contents(self):
  160. """The contents of the tag, as a ``Wikicode`` object."""
  161. return self._contents
  162. @property
  163. def attrs(self):
  164. """The list of attributes affecting the tag.
  165. Each attribute is an instance of
  166. :py:class:`~mwparserfromhell.nodes.extras.attribute.Attribute`.
  167. """
  168. return self._attrs
  169. @property
  170. def showtag(self):
  171. """Whether to show the tag itself instead of a wikicode version."""
  172. return self._showtag
  173. @property
  174. def self_closing(self):
  175. """Whether the tag is self-closing with no content."""
  176. return self._self_closing
  177. @property
  178. def open_padding(self):
  179. """How much spacing to insert before the first closing >."""
  180. return self._open_padding
  181. @property
  182. def close_padding(self):
  183. """How much spacing to insert before the last closing >."""
  184. return self._close_padding