A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
25'ten fazla konu seçemezsiniz Konular bir harf veya rakamla başlamalı, kısa çizgiler ('-') içerebilir ve en fazla 35 karakter uzunluğunda olabilir.
 
 
 
 

237 satır
9.2 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from __future__ import unicode_literals
  23. from . import tokens
  24. from ..compat import str
  25. from ..nodes import Argument, Comment, Heading, HTMLEntity, Tag, Template, Text
  26. from ..nodes.extras import Attribute, Parameter
  27. from ..smart_list import SmartList
  28. from ..wikicode import Wikicode
  29. __all__ = ["Builder"]
  30. class Builder(object):
  31. """Combines a sequence of tokens into a tree of ``Wikicode`` objects.
  32. To use, pass a list of :py:class:`~.Token`\ s to the :py:meth:`build`
  33. method. The list will be exhausted as it is parsed and a
  34. :py:class:`~.Wikicode` object will be returned.
  35. """
  36. def __init__(self):
  37. self._tokens = []
  38. self._stacks = []
  39. def _wrap(self, nodes):
  40. """Properly wrap a list of nodes in a ``Wikicode`` object."""
  41. return Wikicode(SmartList(nodes))
  42. def _push(self):
  43. """Push a new node list onto the stack."""
  44. self._stacks.append([])
  45. def _pop(self, wrap=True):
  46. """Pop the current node list off of the stack.
  47. If *wrap* is ``True``, we will call :py:meth:`_wrap` on the list.
  48. """
  49. if wrap:
  50. return self._wrap(self._stacks.pop())
  51. return self._stacks.pop()
  52. def _write(self, item):
  53. """Append a node to the current node list."""
  54. self._stacks[-1].append(item)
  55. def _handle_parameter(self, default):
  56. """Handle a case where a parameter is at the head of the tokens.
  57. *default* is the value to use if no parameter name is defined.
  58. """
  59. key = None
  60. showkey = False
  61. self._push()
  62. while self._tokens:
  63. token = self._tokens.pop()
  64. if isinstance(token, tokens.TemplateParamEquals):
  65. key = self._pop()
  66. showkey = True
  67. self._push()
  68. elif isinstance(token, (tokens.TemplateParamSeparator,
  69. tokens.TemplateClose)):
  70. self._tokens.append(token)
  71. value = self._pop()
  72. if not key:
  73. key = self._wrap([Text(str(default))])
  74. return Parameter(key, value, showkey)
  75. else:
  76. self._write(self._handle_token(token))
  77. def _handle_template(self):
  78. """Handle a case where a template is at the head of the tokens."""
  79. params = []
  80. default = 1
  81. self._push()
  82. while self._tokens:
  83. token = self._tokens.pop()
  84. if isinstance(token, tokens.TemplateParamSeparator):
  85. if not params:
  86. name = self._pop()
  87. param = self._handle_parameter(default)
  88. params.append(param)
  89. if not param.showkey:
  90. default += 1
  91. elif isinstance(token, tokens.TemplateClose):
  92. if not params:
  93. name = self._pop()
  94. return Template(name, params)
  95. else:
  96. self._write(self._handle_token(token))
  97. def _handle_argument(self):
  98. """Handle a case where an argument is at the head of the tokens."""
  99. name = None
  100. self._push()
  101. while self._tokens:
  102. token = self._tokens.pop()
  103. if isinstance(token, tokens.ArgumentSeparator):
  104. name = self._pop()
  105. self._push()
  106. elif isinstance(token, tokens.ArgumentClose):
  107. if name is not None:
  108. return Argument(name, self._pop())
  109. return Argument(self._pop())
  110. else:
  111. self._write(self._handle_token(token))
  112. def _handle_entity(self):
  113. """Handle a case where a HTML entity is at the head of the tokens."""
  114. token = self._tokens.pop()
  115. if isinstance(token, tokens.HTMLEntityNumeric):
  116. token = self._tokens.pop()
  117. if isinstance(token, tokens.HTMLEntityHex):
  118. text = self._tokens.pop()
  119. self._tokens.pop() # Remove HTMLEntityEnd
  120. return HTMLEntity(text.text, named=False, hexadecimal=True,
  121. hex_char=token.char)
  122. self._tokens.pop() # Remove HTMLEntityEnd
  123. return HTMLEntity(token.text, named=False, hexadecimal=False)
  124. self._tokens.pop() # Remove HTMLEntityEnd
  125. return HTMLEntity(token.text, named=True, hexadecimal=False)
  126. def _handle_heading(self, token):
  127. """Handle a case where a heading is at the head of the tokens."""
  128. level = token.level
  129. self._push()
  130. while self._tokens:
  131. token = self._tokens.pop()
  132. if isinstance(token, tokens.HeadingEnd):
  133. title = self._pop()
  134. return Heading(title, level)
  135. else:
  136. self._write(self._handle_token(token))
  137. def _handle_comment(self):
  138. """Handle a case where a hidden comment is at the head of the tokens."""
  139. self._push()
  140. while self._tokens:
  141. token = self._tokens.pop()
  142. if isinstance(token, tokens.CommentEnd):
  143. contents = self._pop()
  144. return Comment(contents)
  145. else:
  146. self._write(self._handle_token(token))
  147. def _handle_attribute(self):
  148. """Handle a case where a tag attribute is at the head of the tokens."""
  149. name, quoted = None, False
  150. self._push()
  151. while self._tokens:
  152. token = self._tokens.pop()
  153. if isinstance(token, tokens.TagAttrEquals):
  154. name = self._pop()
  155. self._push()
  156. elif isinstance(token, tokens.TagAttrQuote):
  157. quoted = True
  158. elif isinstance(token, (tokens.TagAttrStart,
  159. tokens.TagCloseOpen)):
  160. self._tokens.append(token)
  161. if name is not None:
  162. return Attribute(name, self._pop(), quoted)
  163. return Attribute(self._pop(), quoted=quoted)
  164. else:
  165. self._write(self._handle_token(token))
  166. def _handle_tag(self, token):
  167. """Handle a case where a tag is at the head of the tokens."""
  168. type_, showtag = token.type, token.showtag
  169. attrs = []
  170. self._push()
  171. while self._tokens:
  172. token = self._tokens.pop()
  173. if isinstance(token, tokens.TagAttrStart):
  174. attrs.append(self._handle_attribute())
  175. elif isinstance(token, tokens.TagCloseOpen):
  176. open_pad = token.padding
  177. tag = self._pop()
  178. self._push()
  179. elif isinstance(token, tokens.TagCloseSelfclose):
  180. tag = self._pop()
  181. return Tag(type_, tag, attrs=attrs, showtag=showtag,
  182. self_closing=True, open_padding=token.padding)
  183. elif isinstance(token, tokens.TagOpenClose):
  184. contents = self._pop()
  185. elif isinstance(token, tokens.TagCloseClose):
  186. return Tag(type_, tag, contents, attrs, showtag, False,
  187. open_pad, token.padding)
  188. else:
  189. self._write(self._handle_token(token))
  190. def _handle_token(self, token):
  191. """Handle a single token."""
  192. if isinstance(token, tokens.Text):
  193. return Text(token.text)
  194. elif isinstance(token, tokens.TemplateOpen):
  195. return self._handle_template()
  196. elif isinstance(token, tokens.ArgumentOpen):
  197. return self._handle_argument()
  198. elif isinstance(token, tokens.HTMLEntityStart):
  199. return self._handle_entity()
  200. elif isinstance(token, tokens.HeadingStart):
  201. return self._handle_heading(token)
  202. elif isinstance(token, tokens.CommentStart):
  203. return self._handle_comment()
  204. elif isinstance(token, tokens.TagOpenOpen):
  205. return self._handle_tag(token)
  206. def build(self, tokenlist):
  207. """Build a Wikicode object from a list tokens and return it."""
  208. self._tokens = tokenlist
  209. self._tokens.reverse()
  210. self._push()
  211. while self._tokens:
  212. node = self._handle_token(self._tokens.pop())
  213. self._write(node)
  214. return self._pop()