A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
25'ten fazla konu seçemezsiniz Konular bir harf veya rakamla başlamalı, kısa çizgiler ('-') içerebilir ve en fazla 35 karakter uzunluğunda olabilir.
 
 
 
 

182 satır
7.1 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. import re
  23. from . import tokens
  24. from ..nodes import Heading, HTMLEntity, Tag, Template, Text
  25. from ..nodes.extras import Attribute, Parameter
  26. from ..smart_list import SmartList
  27. from ..wikicode import Wikicode
  28. __all__ = ["Builder"]
  29. class Builder(object):
  30. def __init__(self):
  31. self._tokens = []
  32. self._stacks = []
  33. def _wrap(self, nodes):
  34. return Wikicode(SmartList(nodes))
  35. def _push(self):
  36. self._stacks.append([])
  37. def _pop(self, wrap=True):
  38. if wrap:
  39. return self._wrap(self._stacks.pop())
  40. return self._stacks.pop()
  41. def _write(self, item):
  42. self._stacks[-1].append(item)
  43. def _handle_parameter(self, key):
  44. showkey = False
  45. self._push()
  46. while self._tokens:
  47. token = self._tokens.pop(0)
  48. if isinstance(token, tokens.TemplateParamEquals):
  49. key = self._pop()
  50. showkey = True
  51. self._push()
  52. elif isinstance(token, (tokens.TemplateParamSeparator,
  53. tokens.TemplateClose)):
  54. self._tokens.insert(0, token)
  55. value = self._pop()
  56. return Parameter(key, value, showkey)
  57. else:
  58. self._write(self._handle_token(token))
  59. def _handle_template(self):
  60. params = []
  61. int_keys = set()
  62. int_key_range = {1}
  63. self._push()
  64. while self._tokens:
  65. token = self._tokens.pop(0)
  66. if isinstance(token, tokens.TemplateParamSeparator):
  67. if not params:
  68. name = self._pop()
  69. default = unicode(min(int_key_range - int_keys))
  70. param = self._handle_parameter(self._wrap([Text(default)]))
  71. if re.match(r"[1-9][0-9]*$", param.name.strip()):
  72. # We try a more restrictive test for integers than
  73. # try: int(), because "01" as a key will pass through int()
  74. # correctly but is not a valid integer key in wikicode:
  75. int_keys.add(int(unicode(param.name)))
  76. int_key_range.add(len(int_keys) + 1)
  77. params.append(param)
  78. elif isinstance(token, tokens.TemplateClose):
  79. if not params:
  80. name = self._pop()
  81. return Template(name, params)
  82. else:
  83. self._write(self._handle_token(token))
  84. def _handle_entity(self):
  85. token = self._tokens.pop(0)
  86. if isinstance(token, tokens.HTMLEntityNumeric):
  87. token = self._tokens.pop(0)
  88. if isinstance(token, tokens.HTMLEntityHex):
  89. text = self._tokens.pop(0)
  90. self._tokens.pop(0) # Remove HTMLEntityEnd
  91. return HTMLEntity(text.text, named=False, hexadecimal=True,
  92. hex_char=token.char)
  93. self._tokens.pop(0) # Remove HTMLEntityEnd
  94. return HTMLEntity(token.text, named=False, hexadecimal=False)
  95. self._tokens.pop(0) # Remove HTMLEntityEnd
  96. return HTMLEntity(token.text, named=True, hexadecimal=False)
  97. def _handle_heading(self, token):
  98. level = token.level
  99. self._push()
  100. while self._tokens:
  101. token = self._tokens.pop(0)
  102. if isinstance(token, tokens.HeadingBlock):
  103. title = self._pop()
  104. return Heading(title, level)
  105. else:
  106. self._write(self._handle_token(token))
  107. def _handle_attribute(self):
  108. name, quoted = None, False
  109. self._push()
  110. while self._tokens:
  111. token = self._tokens.pop(0)
  112. if isinstance(token, tokens.TagAttrEquals):
  113. name = self._pop()
  114. self._push()
  115. elif isinstance(token, tokens.TagAttrQuote):
  116. quoted = True
  117. elif isinstance(token, (tokens.TagAttrStart,
  118. tokens.TagCloseOpen)):
  119. self._tokens.insert(0, token)
  120. if name is not None:
  121. return Attribute(name, self._pop(), quoted)
  122. return Attribute(self._pop(), quoted=quoted)
  123. else:
  124. self._write(self._handle_token(token))
  125. def _handle_tag(self, token):
  126. type_, showtag = token.type, token.showtag
  127. attrs = []
  128. self._push()
  129. while self._tokens:
  130. token = self._tokens.pop(0)
  131. if isinstance(token, tokens.TagAttrStart):
  132. attrs.append(self._handle_attribute())
  133. elif isinstance(token, tokens.TagCloseOpen):
  134. open_pad = token.padding
  135. tag = self._pop()
  136. self._push()
  137. elif isinstance(token, tokens.TagCloseSelfclose):
  138. tag = self._pop()
  139. return Tag(type_, tag, attrs=attrs, showtag=showtag,
  140. self_closing=True, open_padding=token.padding)
  141. elif isinstance(token, tokens.TagOpenClose):
  142. contents = self._pop()
  143. elif isinstance(token, tokens.TagCloseClose):
  144. return Tag(type_, tag, contents, attrs, showtag, False,
  145. open_pad, token.padding)
  146. else:
  147. self._write(self._handle_token(token))
  148. def _handle_token(self, token):
  149. if isinstance(token, tokens.Text):
  150. return Text(token.text)
  151. elif isinstance(token, tokens.TemplateOpen):
  152. return self._handle_template()
  153. elif isinstance(token, tokens.HTMLEntityStart):
  154. return self._handle_entity()
  155. elif isinstance(token, tokens.HeadingBlock):
  156. return self._handle_heading(token)
  157. elif isinstance(token, tokens.TagOpenOpen):
  158. return self._handle_tag(token)
  159. def build(self, tokenlist):
  160. self._tokens = tokenlist
  161. self._push()
  162. while self._tokens:
  163. node = self._handle_token(self._tokens.pop(0))
  164. self._write(node)
  165. return self._pop()