A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

181 lines
6.8 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from __future__ import unicode_literals
  23. from . import tokens
  24. from ..nodes import Heading, HTMLEntity, Tag, Template, Text
  25. from ..nodes.extras import Attribute, Parameter
  26. from ..smart_list import SmartList
  27. from ..wikicode import Wikicode
  28. from ..compat import str, bytes
  29. __all__ = ["Builder"]
  30. class Builder(object):
  31. def __init__(self):
  32. self._tokens = []
  33. self._stacks = []
  34. def _wrap(self, nodes):
  35. return Wikicode(SmartList(nodes))
  36. def _push(self):
  37. self._stacks.append([])
  38. def _pop(self, wrap=True):
  39. if wrap:
  40. return self._wrap(self._stacks.pop())
  41. return self._stacks.pop()
  42. def _write(self, item):
  43. self._stacks[-1].append(item)
  44. def _handle_parameter(self, default):
  45. key = None
  46. showkey = False
  47. self._push()
  48. while self._tokens:
  49. token = self._tokens.pop()
  50. if isinstance(token, tokens.TemplateParamEquals):
  51. key = self._pop()
  52. showkey = True
  53. self._push()
  54. elif isinstance(token, (tokens.TemplateParamSeparator,
  55. tokens.TemplateClose)):
  56. self._tokens.append(token)
  57. value = self._pop()
  58. if not key:
  59. key = self._wrap([Text(str(default))])
  60. return Parameter(key, value, showkey)
  61. else:
  62. self._write(self._handle_token(token))
  63. def _handle_template(self):
  64. params = []
  65. default = 1
  66. self._push()
  67. while self._tokens:
  68. token = self._tokens.pop()
  69. if isinstance(token, tokens.TemplateParamSeparator):
  70. if not params:
  71. name = self._pop()
  72. param = self._handle_parameter(default)
  73. params.append(param)
  74. if not param.showkey:
  75. default += 1
  76. elif isinstance(token, tokens.TemplateClose):
  77. if not params:
  78. name = self._pop()
  79. return Template(name, params)
  80. else:
  81. self._write(self._handle_token(token))
  82. def _handle_entity(self):
  83. token = self._tokens.pop()
  84. if isinstance(token, tokens.HTMLEntityNumeric):
  85. token = self._tokens.pop()
  86. if isinstance(token, tokens.HTMLEntityHex):
  87. text = self._tokens.pop()
  88. self._tokens.pop() # Remove HTMLEntityEnd
  89. return HTMLEntity(text.text, named=False, hexadecimal=True,
  90. hex_char=token.char)
  91. self._tokens.pop() # Remove HTMLEntityEnd
  92. return HTMLEntity(token.text, named=False, hexadecimal=False)
  93. self._tokens.pop() # Remove HTMLEntityEnd
  94. return HTMLEntity(token.text, named=True, hexadecimal=False)
  95. def _handle_heading(self, token):
  96. level = token.level
  97. self._push()
  98. while self._tokens:
  99. token = self._tokens.pop()
  100. if isinstance(token, tokens.HeadingEnd):
  101. title = self._pop()
  102. return Heading(title, level)
  103. else:
  104. self._write(self._handle_token(token))
  105. def _handle_attribute(self):
  106. name, quoted = None, False
  107. self._push()
  108. while self._tokens:
  109. token = self._tokens.pop()
  110. if isinstance(token, tokens.TagAttrEquals):
  111. name = self._pop()
  112. self._push()
  113. elif isinstance(token, tokens.TagAttrQuote):
  114. quoted = True
  115. elif isinstance(token, (tokens.TagAttrStart,
  116. tokens.TagCloseOpen)):
  117. self._tokens.append(token)
  118. if name is not None:
  119. return Attribute(name, self._pop(), quoted)
  120. return Attribute(self._pop(), quoted=quoted)
  121. else:
  122. self._write(self._handle_token(token))
  123. def _handle_tag(self, token):
  124. type_, showtag = token.type, token.showtag
  125. attrs = []
  126. self._push()
  127. while self._tokens:
  128. token = self._tokens.pop()
  129. if isinstance(token, tokens.TagAttrStart):
  130. attrs.append(self._handle_attribute())
  131. elif isinstance(token, tokens.TagCloseOpen):
  132. open_pad = token.padding
  133. tag = self._pop()
  134. self._push()
  135. elif isinstance(token, tokens.TagCloseSelfclose):
  136. tag = self._pop()
  137. return Tag(type_, tag, attrs=attrs, showtag=showtag,
  138. self_closing=True, open_padding=token.padding)
  139. elif isinstance(token, tokens.TagOpenClose):
  140. contents = self._pop()
  141. elif isinstance(token, tokens.TagCloseClose):
  142. return Tag(type_, tag, contents, attrs, showtag, False,
  143. open_pad, token.padding)
  144. else:
  145. self._write(self._handle_token(token))
  146. def _handle_token(self, token):
  147. if isinstance(token, tokens.Text):
  148. return Text(token.text)
  149. elif isinstance(token, tokens.TemplateOpen):
  150. return self._handle_template()
  151. elif isinstance(token, tokens.HTMLEntityStart):
  152. return self._handle_entity()
  153. elif isinstance(token, tokens.HeadingStart):
  154. return self._handle_heading(token)
  155. elif isinstance(token, tokens.TagOpenOpen):
  156. return self._handle_tag(token)
  157. def build(self, tokenlist):
  158. self._tokens = tokenlist
  159. self._tokens.reverse()
  160. self._push()
  161. while self._tokens:
  162. node = self._handle_token(self._tokens.pop())
  163. self._write(node)
  164. return self._pop()