A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
Não pode escolher mais do que 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.
 
 
 
 

597 linhas
27 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2012-2014 Ben Kurtovic <ben.kurtovic@gmail.com>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from __future__ import unicode_literals
  23. from itertools import chain
  24. import re
  25. from .compat import py3k, range, str
  26. from .nodes import (Argument, Comment, ExternalLink, Heading, HTMLEntity,
  27. Node, Tag, Template, Text, Wikilink)
  28. from .string_mixin import StringMixIn
  29. from .utils import parse_anything
  30. __all__ = ["Wikicode"]
  31. FLAGS = re.IGNORECASE | re.DOTALL | re.UNICODE
  32. class Wikicode(StringMixIn):
  33. """A ``Wikicode`` is a container for nodes that operates like a string.
  34. Additionally, it contains methods that can be used to extract data from or
  35. modify the nodes, implemented in an interface similar to a list. For
  36. example, :py:meth:`index` can get the index of a node in the list, and
  37. :py:meth:`insert` can add a new node at that index. The :py:meth:`filter()
  38. <ifilter>` series of functions is very useful for extracting and iterating
  39. over, for example, all of the templates in the object.
  40. """
  41. RECURSE_OTHERS = 2
  42. def __init__(self, nodes):
  43. super(Wikicode, self).__init__()
  44. self._nodes = nodes
  45. def __unicode__(self):
  46. return "".join([str(node) for node in self.nodes])
  47. @staticmethod
  48. def _get_children(node, contexts=False, restrict=None, parent=None):
  49. """Iterate over all child :py:class:`.Node`\ s of a given *node*."""
  50. yield (parent, node) if contexts else node
  51. if restrict and isinstance(node, restrict):
  52. return
  53. for code in node.__children__():
  54. for child in code.nodes:
  55. sub = Wikicode._get_children(child, contexts, restrict, code)
  56. for result in sub:
  57. yield result
  58. @staticmethod
  59. def _slice_replace(code, index, old, new):
  60. """Replace the string *old* with *new* across *index* in *code*."""
  61. nodes = [str(node) for node in code.get(index)]
  62. substring = "".join(nodes).replace(old, new)
  63. code.nodes[index] = parse_anything(substring).nodes
  64. @staticmethod
  65. def _build_matcher(matches, flags):
  66. """Helper for :py:meth:`_indexed_ifilter` and others.
  67. If *matches* is a function, return it. If it's a regex, return a
  68. wrapper around it that can be called with a node to do a search. If
  69. it's ``None``, return a function that always returns ``True``.
  70. """
  71. if matches:
  72. if callable(matches):
  73. return matches
  74. return lambda obj: re.search(matches, str(obj), flags)
  75. return lambda obj: True
  76. def _indexed_ifilter(self, recursive=True, matches=None, flags=FLAGS,
  77. forcetype=None):
  78. """Iterate over nodes and their corresponding indices in the node list.
  79. The arguments are interpreted as for :py:meth:`ifilter`. For each tuple
  80. ``(i, node)`` yielded by this method, ``self.index(node) == i``. Note
  81. that if *recursive* is ``True``, ``self.nodes[i]`` might not be the
  82. node itself, but will still contain it.
  83. """
  84. match = self._build_matcher(matches, flags)
  85. if recursive:
  86. restrict = forcetype if recursive == self.RECURSE_OTHERS else None
  87. def getter(i, node):
  88. for ch in self._get_children(node, restrict=restrict):
  89. yield (i, ch)
  90. inodes = chain(*(getter(i, n) for i, n in enumerate(self.nodes)))
  91. else:
  92. inodes = enumerate(self.nodes)
  93. for i, node in inodes:
  94. if (not forcetype or isinstance(node, forcetype)) and match(node):
  95. yield (i, node)
  96. def _do_strong_search(self, obj, recursive=True):
  97. """Search for the specific element *obj* within the node list.
  98. *obj* can be either a :py:class:`.Node` or a :py:class:`.Wikicode`
  99. object. If found, we return a tuple (*context*, *index*) where
  100. *context* is the :py:class:`.Wikicode` that contains *obj* and *index*
  101. is its index there, as a :py:class:`slice`. Note that if *recursive* is
  102. ``False``, *context* will always be ``self`` (since we only look for
  103. *obj* among immediate descendants), but if *recursive* is ``True``,
  104. then it could be any :py:class:`.Wikicode` contained by a node within
  105. ``self``. If *obj* is not found, :py:exc:`ValueError` is raised.
  106. """
  107. if isinstance(obj, Node):
  108. mkslice = lambda i: slice(i, i + 1)
  109. if not recursive:
  110. return self, mkslice(self.index(obj))
  111. for i, node in enumerate(self.nodes):
  112. for context, child in self._get_children(node, contexts=True):
  113. if obj is child:
  114. if not context:
  115. context = self
  116. return context, mkslice(context.index(child))
  117. raise ValueError(obj)
  118. context, ind = self._do_strong_search(obj.get(0), recursive)
  119. for i in range(1, len(obj.nodes)):
  120. if obj.get(i) is not context.get(ind.start + i):
  121. raise ValueError(obj)
  122. return context, slice(ind.start, ind.start + len(obj.nodes))
  123. def _do_weak_search(self, obj, recursive):
  124. """Search for an element that looks like *obj* within the node list.
  125. This follows the same rules as :py:meth:`_do_strong_search` with some
  126. differences. *obj* is treated as a string that might represent any
  127. :py:class:`.Node`, :py:class:`.Wikicode`, or combination of the two
  128. present in the node list. Thus, matching is weak (using string
  129. comparisons) rather than strong (using ``is``). Because multiple nodes
  130. can match *obj*, the result is a list of tuples instead of just one
  131. (however, :py:exc:`ValueError` is still raised if nothing is found).
  132. Individual matches will never overlap.
  133. The tuples contain a new first element, *exact*, which is ``True`` if
  134. we were able to match *obj* exactly to one or more adjacent nodes, or
  135. ``False`` if we found *obj* inside a node or incompletely spanning
  136. multiple nodes.
  137. """
  138. obj = parse_anything(obj)
  139. if not obj or obj not in self:
  140. raise ValueError(obj)
  141. results = []
  142. contexts = [self]
  143. while contexts:
  144. context = contexts.pop()
  145. i = len(context.nodes) - 1
  146. while i >= 0:
  147. node = context.get(i)
  148. if obj.get(-1) == node:
  149. for j in range(-len(obj.nodes), -1):
  150. if obj.get(j) != context.get(i + j + 1):
  151. break
  152. else:
  153. i -= len(obj.nodes) - 1
  154. index = slice(i, i + len(obj.nodes))
  155. results.append((True, context, index))
  156. elif recursive and obj in node:
  157. contexts.extend(node.__children__())
  158. i -= 1
  159. if not results:
  160. if not recursive:
  161. raise ValueError(obj)
  162. results.append((False, self, slice(0, len(self.nodes))))
  163. return results
  164. def _get_tree(self, code, lines, marker, indent):
  165. """Build a tree to illustrate the way the Wikicode object was parsed.
  166. The method that builds the actual tree is ``__showtree__`` of ``Node``
  167. objects. *code* is the ``Wikicode`` object to build a tree for. *lines*
  168. is the list to append the tree to, which is returned at the end of the
  169. method. *marker* is some object to be used to indicate that the builder
  170. should continue on from the last line instead of starting a new one; it
  171. should be any object that can be tested for with ``is``. *indent* is
  172. the starting indentation.
  173. """
  174. def write(*args):
  175. """Write a new line following the proper indentation rules."""
  176. if lines and lines[-1] is marker: # Continue from the last line
  177. lines.pop() # Remove the marker
  178. last = lines.pop()
  179. lines.append(last + " ".join(args))
  180. else:
  181. lines.append(" " * 6 * indent + " ".join(args))
  182. get = lambda code: self._get_tree(code, lines, marker, indent + 1)
  183. mark = lambda: lines.append(marker)
  184. for node in code.nodes:
  185. node.__showtree__(write, get, mark)
  186. return lines
  187. @classmethod
  188. def _build_filter_methods(cls, **meths):
  189. """Given Node types, build the corresponding i?filter shortcuts.
  190. The should be given as keys storing the method's base name paired
  191. with values storing the corresponding :py:class:`~.Node` type. For
  192. example, the dict may contain the pair ``("templates", Template)``,
  193. which will produce the methods :py:meth:`ifilter_templates` and
  194. :py:meth:`filter_templates`, which are shortcuts for
  195. :py:meth:`ifilter(forcetype=Template) <ifilter>` and
  196. :py:meth:`filter(forcetype=Template) <filter>`, respectively. These
  197. shortcuts are added to the class itself, with an appropriate docstring.
  198. """
  199. doc = """Iterate over {0}.
  200. This is equivalent to :py:meth:`{1}` with *forcetype* set to
  201. :py:class:`~{2.__module__}.{2.__name__}`.
  202. """
  203. make_ifilter = lambda ftype: (lambda self, *a, **kw:
  204. self.ifilter(forcetype=ftype, *a, **kw))
  205. make_filter = lambda ftype: (lambda self, *a, **kw:
  206. self.filter(forcetype=ftype, *a, **kw))
  207. for name, ftype in (meths.items() if py3k else meths.iteritems()):
  208. ifilter = make_ifilter(ftype)
  209. filter = make_filter(ftype)
  210. ifilter.__doc__ = doc.format(name, "ifilter", ftype)
  211. filter.__doc__ = doc.format(name, "filter", ftype)
  212. setattr(cls, "ifilter_" + name, ifilter)
  213. setattr(cls, "filter_" + name, filter)
  214. @property
  215. def nodes(self):
  216. """A list of :py:class:`~.Node` objects.
  217. This is the internal data actually stored within a
  218. :py:class:`~.Wikicode` object.
  219. """
  220. return self._nodes
  221. @nodes.setter
  222. def nodes(self, value):
  223. if not isinstance(value, list):
  224. value = parse_anything(value).nodes
  225. self._nodes = value
  226. def get(self, index):
  227. """Return the *index*\ th node within the list of nodes."""
  228. return self.nodes[index]
  229. def set(self, index, value):
  230. """Set the ``Node`` at *index* to *value*.
  231. Raises :py:exc:`IndexError` if *index* is out of range, or
  232. :py:exc:`ValueError` if *value* cannot be coerced into one
  233. :py:class:`~.Node`. To insert multiple nodes at an index, use
  234. :py:meth:`get` with either :py:meth:`remove` and :py:meth:`insert` or
  235. :py:meth:`replace`.
  236. """
  237. nodes = parse_anything(value).nodes
  238. if len(nodes) > 1:
  239. raise ValueError("Cannot coerce multiple nodes into one index")
  240. if index >= len(self.nodes) or -1 * index > len(self.nodes):
  241. raise IndexError("List assignment index out of range")
  242. if nodes:
  243. self.nodes[index] = nodes[0]
  244. else:
  245. self.nodes.pop(index)
  246. def index(self, obj, recursive=False):
  247. """Return the index of *obj* in the list of nodes.
  248. Raises :py:exc:`ValueError` if *obj* is not found. If *recursive* is
  249. ``True``, we will look in all nodes of ours and their descendants, and
  250. return the index of our direct descendant node within *our* list of
  251. nodes. Otherwise, the lookup is done only on direct descendants.
  252. """
  253. strict = isinstance(obj, Node)
  254. equivalent = (lambda o, n: o is n) if strict else (lambda o, n: o == n)
  255. for i, node in enumerate(self.nodes):
  256. if recursive:
  257. for child in self._get_children(node):
  258. if equivalent(obj, child):
  259. return i
  260. elif equivalent(obj, node):
  261. return i
  262. raise ValueError(obj)
  263. def insert(self, index, value):
  264. """Insert *value* at *index* in the list of nodes.
  265. *value* can be anything parsable by :py:func:`.parse_anything`, which
  266. includes strings or other :py:class:`~.Wikicode` or :py:class:`~.Node`
  267. objects.
  268. """
  269. nodes = parse_anything(value).nodes
  270. for node in reversed(nodes):
  271. self.nodes.insert(index, node)
  272. def insert_before(self, obj, value, recursive=True):
  273. """Insert *value* immediately before *obj*.
  274. *obj* can be either a string, a :py:class:`~.Node`, or another
  275. :py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`,
  276. for example). If *obj* is a string, we will operate on all instances
  277. of that string within the code, otherwise only on the specific instance
  278. given. *value* can be anything parsable by :py:func:`.parse_anything`.
  279. If *recursive* is ``True``, we will try to find *obj* within our child
  280. nodes even if it is not a direct descendant of this
  281. :py:class:`~.Wikicode` object. If *obj* is not found,
  282. :py:exc:`ValueError` is raised.
  283. """
  284. if isinstance(obj, (Node, Wikicode)):
  285. context, index = self._do_strong_search(obj, recursive)
  286. context.insert(index.start, value)
  287. else:
  288. for exact, context, index in self._do_weak_search(obj, recursive):
  289. if exact:
  290. context.insert(index.start, value)
  291. else:
  292. obj = str(obj)
  293. self._slice_replace(context, index, obj, str(value) + obj)
  294. def insert_after(self, obj, value, recursive=True):
  295. """Insert *value* immediately after *obj*.
  296. *obj* can be either a string, a :py:class:`~.Node`, or another
  297. :py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`,
  298. for example). If *obj* is a string, we will operate on all instances
  299. of that string within the code, otherwise only on the specific instance
  300. given. *value* can be anything parsable by :py:func:`.parse_anything`.
  301. If *recursive* is ``True``, we will try to find *obj* within our child
  302. nodes even if it is not a direct descendant of this
  303. :py:class:`~.Wikicode` object. If *obj* is not found,
  304. :py:exc:`ValueError` is raised.
  305. """
  306. if isinstance(obj, (Node, Wikicode)):
  307. context, index = self._do_strong_search(obj, recursive)
  308. context.insert(index.stop, value)
  309. else:
  310. for exact, context, index in self._do_weak_search(obj, recursive):
  311. if exact:
  312. context.insert(index.stop, value)
  313. else:
  314. obj = str(obj)
  315. self._slice_replace(context, index, obj, obj + str(value))
  316. def replace(self, obj, value, recursive=True):
  317. """Replace *obj* with *value*.
  318. *obj* can be either a string, a :py:class:`~.Node`, or another
  319. :py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`,
  320. for example). If *obj* is a string, we will operate on all instances
  321. of that string within the code, otherwise only on the specific instance
  322. given. *value* can be anything parsable by :py:func:`.parse_anything`.
  323. If *recursive* is ``True``, we will try to find *obj* within our child
  324. nodes even if it is not a direct descendant of this
  325. :py:class:`~.Wikicode` object. If *obj* is not found,
  326. :py:exc:`ValueError` is raised.
  327. """
  328. if isinstance(obj, (Node, Wikicode)):
  329. context, index = self._do_strong_search(obj, recursive)
  330. for i in range(index.start, index.stop):
  331. context.nodes.pop(index.start)
  332. context.insert(index.start, value)
  333. else:
  334. for exact, context, index in self._do_weak_search(obj, recursive):
  335. if exact:
  336. for i in range(index.start, index.stop):
  337. context.nodes.pop(index.start)
  338. context.insert(index.start, value)
  339. else:
  340. self._slice_replace(context, index, str(obj), str(value))
  341. def append(self, value):
  342. """Insert *value* at the end of the list of nodes.
  343. *value* can be anything parsable by :py:func:`.parse_anything`.
  344. """
  345. nodes = parse_anything(value).nodes
  346. for node in nodes:
  347. self.nodes.append(node)
  348. def remove(self, obj, recursive=True):
  349. """Remove *obj* from the list of nodes.
  350. *obj* can be either a string, a :py:class:`~.Node`, or another
  351. :py:class:`~.Wikicode` object (as created by :py:meth:`get_sections`,
  352. for example). If *obj* is a string, we will operate on all instances
  353. of that string within the code, otherwise only on the specific instance
  354. given. If *recursive* is ``True``, we will try to find *obj* within our
  355. child nodes even if it is not a direct descendant of this
  356. :py:class:`~.Wikicode` object. If *obj* is not found,
  357. :py:exc:`ValueError` is raised.
  358. """
  359. if isinstance(obj, (Node, Wikicode)):
  360. context, index = self._do_strong_search(obj, recursive)
  361. for i in range(index.start, index.stop):
  362. context.nodes.pop(index.start)
  363. else:
  364. for exact, context, index in self._do_weak_search(obj, recursive):
  365. if exact:
  366. for i in range(index.start, index.stop):
  367. context.nodes.pop(index.start)
  368. else:
  369. self._slice_replace(context, index, str(obj), "")
  370. def matches(self, other):
  371. """Do a loose equivalency test suitable for comparing page names.
  372. *other* can be any string-like object, including
  373. :py:class:`~.Wikicode`, or a tuple of these. This operation is
  374. symmetric; both sides are adjusted. Specifically, whitespace and markup
  375. is stripped and the first letter's case is normalized. Typical usage is
  376. ``if template.name.matches("stub"): ...``.
  377. """
  378. cmp = lambda a, b: (a[0].upper() + a[1:] == b[0].upper() + b[1:]
  379. if a and b else a == b)
  380. this = self.strip_code().strip()
  381. if isinstance(other, (tuple, list)):
  382. for obj in other:
  383. that = parse_anything(obj).strip_code().strip()
  384. if cmp(this, that):
  385. return True
  386. return False
  387. that = parse_anything(other).strip_code().strip()
  388. return cmp(this, that)
  389. def ifilter(self, recursive=True, matches=None, flags=FLAGS,
  390. forcetype=None):
  391. """Iterate over nodes in our list matching certain conditions.
  392. If *forcetype* is given, only nodes that are instances of this type (or
  393. tuple of types) are yielded. Setting *recursive* to ``True`` will
  394. iterate over all children and their descendants. ``RECURSE_OTHERS``
  395. will only iterate over children that are not the instances of
  396. *forcetype*. ``False`` will only iterate over immediate children.
  397. ``RECURSE_OTHERS`` can be used to iterate over all un-nested templates,
  398. even if they are inside of HTML tags, like so:
  399. >>> code = mwparserfromhell.parse("{{foo}}<b>{{foo|{{bar}}}}</b>")
  400. >>> code.filter_templates(code.RECURSE_OTHERS)
  401. ["{{foo}}", "{{foo|{{bar}}}}"]
  402. *matches* can be used to further restrict the nodes, either as a
  403. function (taking a single :py:class:`.Node` and returning a boolean) or
  404. a regular expression (matched against the node's string representation
  405. with :py:func:`re.search`). If *matches* is a regex, the flags passed
  406. to :py:func:`re.search` are :py:const:`re.IGNORECASE`,
  407. :py:const:`re.DOTALL`, and :py:const:`re.UNICODE`, but custom flags can
  408. be specified by passing *flags*.
  409. """
  410. gen = self._indexed_ifilter(recursive, matches, flags, forcetype)
  411. return (node for i, node in gen)
  412. def filter(self, *args, **kwargs):
  413. """Return a list of nodes within our list matching certain conditions.
  414. This is equivalent to calling :py:func:`list` on :py:meth:`ifilter`.
  415. """
  416. return list(self.ifilter(*args, **kwargs))
  417. def get_sections(self, levels=None, matches=None, flags=FLAGS, flat=False,
  418. include_lead=None, include_headings=True):
  419. """Return a list of sections within the page.
  420. Sections are returned as :py:class:`~.Wikicode` objects with a shared
  421. node list (implemented using :py:class:`~.SmartList`) so that changes
  422. to sections are reflected in the parent Wikicode object.
  423. Each section contains all of its subsections, unless *flat* is
  424. ``True``. If *levels* is given, it should be a iterable of integers;
  425. only sections whose heading levels are within it will be returned. If
  426. *matches* is given, it should be either a function or a regex; only
  427. sections whose headings match it (without the surrounding equal signs)
  428. will be included. *flags* can be used to override the default regex
  429. flags (see :py:meth:`ifilter`) if a regex *matches* is used.
  430. If *include_lead* is ``True``, the first, lead section (without a
  431. heading) will be included in the list; ``False`` will not include it;
  432. the default will include it only if no specific *levels* were given. If
  433. *include_headings* is ``True``, the section's beginning
  434. :py:class:`~.Heading` object will be included; otherwise, this is
  435. skipped.
  436. """
  437. title_matcher = self._build_matcher(matches, flags)
  438. matcher = lambda heading: (title_matcher(heading.title) and
  439. (not levels or heading.level in levels))
  440. iheadings = self._indexed_ifilter(recursive=False, forcetype=Heading)
  441. sections = [] # Tuples of (index_of_first_node, section)
  442. open_headings = [] # Tuples of (index, heading), where index and
  443. # heading.level are both monotonically increasing
  444. # Add the lead section if appropriate:
  445. if include_lead or not (include_lead is not None or matches or levels):
  446. itr = self._indexed_ifilter(recursive=False, forcetype=Heading)
  447. try:
  448. first = next(itr)[0]
  449. sections.append((0, Wikicode(self.nodes[:first])))
  450. except StopIteration: # No headings in page
  451. sections.append((0, Wikicode(self.nodes[:])))
  452. # Iterate over headings, adding sections to the list as they end:
  453. for i, heading in iheadings:
  454. if flat: # With flat, all sections close at the next heading
  455. newly_closed, open_headings = open_headings, []
  456. else: # Otherwise, figure out which sections have closed, if any
  457. closed_start_index = len(open_headings)
  458. for j, (start, last_heading) in enumerate(open_headings):
  459. if heading.level <= last_heading.level:
  460. closed_start_index = j
  461. break
  462. newly_closed = open_headings[closed_start_index:]
  463. del open_headings[closed_start_index:]
  464. for start, closed_heading in newly_closed:
  465. if matcher(closed_heading):
  466. sections.append((start, Wikicode(self.nodes[start:i])))
  467. start = i if include_headings else (i + 1)
  468. open_headings.append((start, heading))
  469. # Add any remaining open headings to the list of sections:
  470. for start, heading in open_headings:
  471. if matcher(heading):
  472. sections.append((start, Wikicode(self.nodes[start:])))
  473. # Ensure that earlier sections are earlier in the returned list:
  474. return [section for i, section in sorted(sections)]
  475. def strip_code(self, normalize=True, collapse=True):
  476. """Return a rendered string without unprintable code such as templates.
  477. The way a node is stripped is handled by the
  478. :py:meth:`~.Node.__strip__` method of :py:class:`~.Node` objects, which
  479. generally return a subset of their nodes or ``None``. For example,
  480. templates and tags are removed completely, links are stripped to just
  481. their display part, headings are stripped to just their title. If
  482. *normalize* is ``True``, various things may be done to strip code
  483. further, such as converting HTML entities like ``&Sigma;``, ``&#931;``,
  484. and ``&#x3a3;`` to ``Σ``. If *collapse* is ``True``, we will try to
  485. remove excess whitespace as well (three or more newlines are converted
  486. to two, for example).
  487. """
  488. nodes = []
  489. for node in self.nodes:
  490. stripped = node.__strip__(normalize, collapse)
  491. if stripped:
  492. nodes.append(str(stripped))
  493. if collapse:
  494. stripped = "".join(nodes).strip("\n")
  495. while "\n\n\n" in stripped:
  496. stripped = stripped.replace("\n\n\n", "\n\n")
  497. return stripped
  498. else:
  499. return "".join(nodes)
  500. def get_tree(self):
  501. """Return a hierarchical tree representation of the object.
  502. The representation is a string makes the most sense printed. It is
  503. built by calling :py:meth:`_get_tree` on the
  504. :py:class:`~.Wikicode` object and its children recursively. The end
  505. result may look something like the following::
  506. >>> text = "Lorem ipsum {{foo|bar|{{baz}}|spam=eggs}}"
  507. >>> print mwparserfromhell.parse(text).get_tree()
  508. Lorem ipsum
  509. {{
  510. foo
  511. | 1
  512. = bar
  513. | 2
  514. = {{
  515. baz
  516. }}
  517. | spam
  518. = eggs
  519. }}
  520. """
  521. marker = object() # Random object we can find with certainty in a list
  522. return "\n".join(self._get_tree(self, [], marker, 0))
  523. Wikicode._build_filter_methods(
  524. arguments=Argument, comments=Comment, external_links=ExternalLink,
  525. headings=Heading, html_entities=HTMLEntity, tags=Tag, templates=Template,
  526. text=Text, wikilinks=Wikilink)