A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

106 lines
4.5 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2012-2019 Ben Kurtovic <ben.kurtovic@gmail.com>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from __future__ import unicode_literals
  23. from . import Node
  24. from ..compat import str
  25. from ..utils import parse_anything
  26. __all__ = ["Wikilink"]
  27. TECHNICAL = set(
  28. "en ceb sv de fr nl ru it es pl war vi ja zh pt ar uk fa sr ca no id ko fi hu sh cs ro eu tr ms eo hy bg ce da he sk zh-min-nan kk min hr et lt be el sl gl azb az nn simple ur th hi ka uz la ta vo cy mk ast tg lv mg tt oc af bs ky sq tl bn zh-yue new te be-tarask br ml pms su nds lb jv ht mr sco szl sw ga ba pnb is my fy cv lmo an ne yo pa bar io gu als ku scn kn bpy ckb wuu ia arz qu mn bat-smg si wa cdo or yi am gd nap bug ilo mai hsb map-bms fo xmf mzn li vec sd eml sah os diq sa ps mrj mhr zh-classical hif nv roa-tara bcl ace hak frr pam nso km se rue mi vls nah bh nds-nl crh gan vep sc ab as bo glk myv co so tk fiu-vro lrc csb kv gv sn udm zea ay ie pcd nrm kab ug stq lez ha kw mwl gom haw gn rm lij lfn lad lo koi mt frp fur dsb dty ext ang ln olo cbk-zam dv bjn ksh gag pi pfl pag av bxr gor xal krc za pap kaa pdc tyv rw to kl nov jam arc kbp kbd tpi tet ig sat ki zu wo na jbo roa-rup lbe bi ty mdf kg lg tcy srn inh xh atj ltg chr sm pih om ak tn cu ts tw rmy bm st chy rn got tum ny ss ch pnt fj iu ady ve ee ks ik sg ff dz ti din cr ng cho kj mh ho ii aa mus hz kr shn hyw".split() + ["category", "file"])
  29. NOT_CAPTION = [
  30. "thumb", "frame", "border", "right", "left", "center", "none",
  31. "baseline", "middle", "sub", "super", "text-top", "text-bottom", "top", "bottom",
  32. "upright"
  33. ]
  34. class Wikilink(Node):
  35. """Represents an internal wikilink, like ``[[Foo|Bar]]``."""
  36. def __init__(self, title, text=None):
  37. super(Wikilink, self).__init__()
  38. self.title = title
  39. self.text = text
  40. def __unicode__(self):
  41. if self.text is not None:
  42. return "[[" + str(self.title) + "|" + str(self.text) + "]]"
  43. return "[[" + str(self.title) + "]]"
  44. def __children__(self):
  45. yield self.title
  46. if self.text is not None:
  47. yield self.text
  48. def __strip__(self, **kwargs):
  49. special_link_name = self.title.partition(":")[0].lower().strip()
  50. if special_link_name in TECHNICAL:
  51. return ""
  52. if special_link_name == "image" and self.text:
  53. caption_parts = []
  54. for entry in self.text.split("|"):
  55. if entry[-2:] == "px":
  56. continue
  57. if any(entry.startswith(prefix) for prefix in NOT_CAPTION):
  58. continue
  59. caption_parts.append(entry)
  60. caption = "|".join(caption_parts)
  61. return parse_anything(caption).strip_code(**kwargs)
  62. if self.text is not None:
  63. return self.text.strip_code(**kwargs)
  64. return self.title.strip_code(**kwargs)
  65. def __showtree__(self, write, get, mark):
  66. write("[[")
  67. get(self.title)
  68. if self.text is not None:
  69. write(" | ")
  70. mark()
  71. get(self.text)
  72. write("]]")
  73. @property
  74. def title(self):
  75. """The title of the linked page, as a :class:`.Wikicode` object."""
  76. return self._title
  77. @property
  78. def text(self):
  79. """The text to display (if any), as a :class:`.Wikicode` object."""
  80. return self._text
  81. @title.setter
  82. def title(self, value):
  83. self._title = parse_anything(value)
  84. @text.setter
  85. def text(self, value):
  86. if value is None:
  87. self._text = None
  88. else:
  89. self._text = parse_anything(value)