A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

106 lines
3.4 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2012-2016 Ben Kurtovic <ben.kurtovic@gmail.com>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from __future__ import unicode_literals
  23. from . import Node
  24. from ..compat import str
  25. from ..utils import parse_anything
  26. __all__ = ["Wikilink"]
  27. class Wikilink(Node):
  28. """Represents an internal wikilink, like ``[[Foo|Bar]]``."""
  29. # a list of links to strip:
  30. strip_links = ['File', 'Image', 'Media', # English
  31. 'Файл', 'Изображение', # Russian
  32. 'Detei', # German
  33. 'Fichier', # French
  34. 'Archivo', # Spanish
  35. 'Immagine', # Italiano
  36. 'Imagem', # Portuguese
  37. 'Plik', # Polish
  38. 'Berkas', # Indonesian
  39. 'Bestand', # Netherlands
  40. 'चित्र', # Hindi
  41. 'Payl', # Cebuano
  42. 'Paypay', # Waray
  43. 'Tập_tin', # Vietnamese
  44. 'ファイル', # Japanese
  45. # -- add here other start words of image wikilinks --
  46. ]
  47. def __init__(self, title, text=None):
  48. super(Wikilink, self).__init__()
  49. self._title = title
  50. self._text = text
  51. def __unicode__(self):
  52. if self.text is not None:
  53. return "[[" + str(self.title) + "|" + str(self.text) + "]]"
  54. return "[[" + str(self.title) + "]]"
  55. def __children__(self):
  56. yield self.title
  57. if self.text is not None:
  58. yield self.text
  59. def __strip__(self, **kwargs):
  60. _title = self.title.lstrip(':')
  61. for word in self.strip_links:
  62. if _title.startswith(word):
  63. return ''
  64. if self.text is not None:
  65. return self.text.strip_code(**kwargs)
  66. return self.title.strip_code(**kwargs)
  67. def __showtree__(self, write, get, mark):
  68. write("[[")
  69. get(self.title)
  70. if self.text is not None:
  71. write(" | ")
  72. mark()
  73. get(self.text)
  74. write("]]")
  75. @property
  76. def title(self):
  77. """The title of the linked page, as a :class:`.Wikicode` object."""
  78. return self._title
  79. @property
  80. def text(self):
  81. """The text to display (if any), as a :class:`.Wikicode` object."""
  82. return self._text
  83. @title.setter
  84. def title(self, value):
  85. self._title = parse_anything(value)
  86. @text.setter
  87. def text(self, value):
  88. if value is None:
  89. self._text = None
  90. else:
  91. self._text = parse_anything(value)