A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

преди 12 години
преди 12 години
преди 12 години
преди 12 години
преди 12 години
преди 12 години
преди 12 години
преди 12 години
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. import htmlentitydefs
  23. import re
  24. import mwparserfromhell
  25. from mwparserfromhell.nodes import Node, Template, Text
  26. from mwparserfromhell.string_mixin import StringMixIn
  27. __all__ = ["Wikicode"]
  28. FLAGS = re.I | re.S | re.U
  29. class Wikicode(StringMixIn):
  30. def __init__(self, nodes):
  31. self._nodes = nodes
  32. def __unicode__(self):
  33. return "".join([unicode(node) for node in self.nodes])
  34. def _nodify(self, value):
  35. if isinstance(value, Wikicode):
  36. return value.nodes
  37. if isinstance(value, Node):
  38. return [value]
  39. if isinstance(value, str) or isinstance(value, unicode):
  40. return mwparserfromhell.parse(value).nodes
  41. error = "Needs string, Node, or Wikicode object, but got {0}: {1}"
  42. raise ValueError(error.format(type(value), value))
  43. def _get_children(self, node):
  44. yield node
  45. if isinstance(node, Template):
  46. for child in self._get_all_nodes(node.name):
  47. yield child
  48. for param in node.params:
  49. if param.showkey:
  50. for child in self._get_all_nodes(param.name):
  51. yield child
  52. for child in self._get_all_nodes(param.value):
  53. yield child
  54. def _get_all_nodes(self, code):
  55. for node in code.nodes:
  56. for child in self._get_children(node):
  57. yield child
  58. def _is_equivalent(self, obj, node):
  59. if isinstance(obj, Node):
  60. if node is obj:
  61. return True
  62. else:
  63. if node == obj:
  64. return True
  65. return False
  66. def _contains(self, nodes, obj):
  67. if isinstance(obj, Node):
  68. for node in nodes:
  69. if node is obj:
  70. return True
  71. else:
  72. if obj in nodes:
  73. return True
  74. return False
  75. def _do_search(self, obj, value, recursive, callback, context=None):
  76. if recursive:
  77. nodes = context.nodes if context else self.nodes
  78. for i, node in enumerate(nodes):
  79. if self._is_equivalent(obj, node):
  80. return callback(self, value, i)
  81. if self._contains(self._get_children(node), obj):
  82. return self._do_search(obj, value, recursive, callback,
  83. context=obj)
  84. raise ValueError(obj)
  85. callback(self, value, self.index(obj, recursive=False))
  86. def _get_tree(self, code, lines, marker=None, indent=0):
  87. def write(*args):
  88. if lines and lines[-1] is marker: # Continue from the last line
  89. lines.pop() # Remove the marker
  90. last = lines.pop()
  91. lines.append(last + " ".join(args))
  92. else:
  93. lines.append(" " * indent + " ".join(args))
  94. for node in code.nodes:
  95. if isinstance(node, Template):
  96. write("{{", )
  97. self._get_tree(node.name, lines, marker, indent + 1)
  98. for param in node.params:
  99. write(" | ")
  100. lines.append(marker) # Continue from this line
  101. self._get_tree(param.name, lines, marker, indent + 1)
  102. write(" = ")
  103. lines.append(marker) # Continue from this line
  104. self._get_tree(param.value, lines, marker, indent + 1)
  105. write("}}")
  106. elif isinstance(node, Text):
  107. write(unicode(node))
  108. else:
  109. raise NotImplementedError(node)
  110. return lines
  111. @property
  112. def nodes(self):
  113. return self._nodes
  114. def get(self, index):
  115. return self.nodes[index]
  116. def set(self, index, value):
  117. nodes = self._nodify(value)
  118. if len(nodes) > 1:
  119. raise ValueError("Cannot coerce multiple nodes into one index")
  120. if index >= len(self.nodes) or -1 * index > len(self.nodes):
  121. raise IndexError("List assignment index out of range")
  122. self.nodex.pop(index)
  123. if nodes:
  124. self.nodes[index] = nodes[0]
  125. def index(self, obj, recursive=False):
  126. if recursive:
  127. for i, node in enumerate(self.nodes):
  128. if self._contains(self._get_children(node), obj):
  129. return i
  130. raise ValueError(obj)
  131. for i, node in enumerate(self.nodes):
  132. if self._is_equivalent(obj, node):
  133. return i
  134. raise ValueError(obj)
  135. def insert(self, index, value):
  136. nodes = self._nodify(value)
  137. for node in reversed(nodes):
  138. self.nodes.insert(index, node)
  139. def insert_before(self, obj, value, recursive=True):
  140. callback = lambda self, value, i: self.insert(i, value)
  141. self._do_search(obj, value, recursive, callback)
  142. def insert_after(self, obj, value, recursive=True):
  143. callback = lambda self, value, i: self.insert(i + 1, value)
  144. self._do_search(obj, value, recursive, callback)
  145. def replace(self, obj, value, recursive=True):
  146. def callback(self, value, i):
  147. self.nodes.pop(i)
  148. self.insert(i, value)
  149. self._do_search(obj, value, recursive, callback)
  150. def append(self, value):
  151. nodes = self._nodify(value)
  152. for node in nodes:
  153. self.nodes.append(node)
  154. def remove(self, obj, recursive=True):
  155. if recursive:
  156. for i, node in enumerate(self.nodes):
  157. if self._is_equivalent(obj, node):
  158. return self.nodes.pop(i)
  159. if self._contains(self._get_children(node), obj):
  160. return node.remove(obj, recursive=True)
  161. raise ValueError(obj)
  162. return self.nodes.pop(self.index(obj))
  163. def ifilter(self, recursive=False, matches=None, flags=FLAGS,
  164. forcetype=None):
  165. if recursive:
  166. nodes = self._get_all_nodes(self)
  167. else:
  168. nodes = self.nodes
  169. for node in nodes:
  170. if not forcetype or isinstance(node, forcetype):
  171. if not matches or re.search(matches, unicode(node), flags):
  172. yield node
  173. def ifilter_templates(self, recursive=False, matches=None, flags=FLAGS):
  174. return self.filter(recursive, matches, flags, forcetype=Template)
  175. def ifilter_text(self, recursive=False, matches=None, flags=FLAGS):
  176. return self.filter(recursive, matches, flags, forcetype=Text)
  177. def filter(self, recursive=False, matches=None, flags=FLAGS,
  178. forcetype=None):
  179. return list(self.ifilter(recursive, matches, flags, forcetype))
  180. def filter_templates(self, recursive=False, matches=None, flags=FLAGS):
  181. return list(self.ifilter_templates(recursive, matches, flags))
  182. def filter_text(self, recursive=False, matches=None, flags=FLAGS):
  183. return list(self.ifilter_text(recursive, matches, flags))
  184. def strip_code(self, normalize=True):
  185. # Magic with htmlentitydefs if normalize
  186. return normalized(u" ".join(self.ifilter_text()))
  187. def get_tree(self):
  188. marker = object() # Random object we can find with certainty in a list
  189. return "\n".join(self._get_tree(self, [], marker))