A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

102 lines
3.3 KiB

  1. # Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
  2. #
  3. # Permission is hereby granted, free of charge, to any person obtaining a copy
  4. # of this software and associated documentation files (the "Software"), to deal
  5. # in the Software without restriction, including without limitation the rights
  6. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. # copies of the Software, and to permit persons to whom the Software is
  8. # furnished to do so, subject to the following conditions:
  9. #
  10. # The above copyright notice and this permission notice shall be included in
  11. # all copies or substantial portions of the Software.
  12. #
  13. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  19. # SOFTWARE.
  20. from ._base import Node
  21. from ..utils import parse_anything
  22. __all__ = ["Wikilink"]
  23. class Wikilink(Node):
  24. """Represents an internal wikilink, like ``[[Foo|Bar]]``."""
  25. # a list of links to strip:
  26. strip_links = ['File', 'Image', 'Media', # English
  27. 'Файл', 'Изображение', # Russian
  28. 'Detei', # German
  29. 'Fichier', # French
  30. 'Archivo', # Spanish
  31. 'Immagine', # Italiano
  32. 'Imagem', # Portuguese
  33. 'Plik', # Polish
  34. 'Berkas', # Indonesian
  35. 'Bestand', # Netherlands
  36. 'चित्र', # Hindi
  37. 'Payl', # Cebuano
  38. 'Paypay', # Waray
  39. 'Tập_tin', # Vietnamese
  40. 'ファイル', # Japanese
  41. # -- add here other start words of image wikilinks --
  42. ]
  43. def __init__(self, title, text=None):
  44. super().__init__()
  45. self.title = title
  46. self.text = text
  47. def __str__(self):
  48. if self.text is not None:
  49. return "[[" + str(self.title) + "|" + str(self.text) + "]]"
  50. return "[[" + str(self.title) + "]]"
  51. def __children__(self):
  52. yield self.title
  53. if self.text is not None:
  54. yield self.text
  55. def __strip__(self, **kwargs):
  56. _title = self.title.lstrip(':')
  57. for word in self.strip_links:
  58. if _title.startswith(word):
  59. return ''
  60. if self.text is not None:
  61. return self.text.strip_code(**kwargs)
  62. return self.title.strip_code(**kwargs)
  63. def __showtree__(self, write, get, mark):
  64. write("[[")
  65. get(self.title)
  66. if self.text is not None:
  67. write(" | ")
  68. mark()
  69. get(self.text)
  70. write("]]")
  71. @property
  72. def title(self):
  73. """The title of the linked page, as a :class:`.Wikicode` object."""
  74. return self._title
  75. @property
  76. def text(self):
  77. """The text to display (if any), as a :class:`.Wikicode` object."""
  78. return self._text
  79. @title.setter
  80. def title(self, value):
  81. self._title = parse_anything(value)
  82. @text.setter
  83. def text(self, value):
  84. if value is None:
  85. self._text = None
  86. else:
  87. self._text = parse_anything(value)