A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

119 regels
3.4 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. from __future__ import unicode_literals
  23. class TagDefinitions(object):
  24. """Contains numerical definitions for valid HTML (and wikicode) tags.
  25. Base class for :py:class:`~.Tag` objects.
  26. """
  27. TAG_UNKNOWN = 0
  28. # Basic HTML:
  29. TAG_ITALIC = 1
  30. TAG_BOLD = 2
  31. TAG_UNDERLINE = 3
  32. TAG_STRIKETHROUGH = 4
  33. TAG_UNORDERED_LIST = 5
  34. TAG_ORDERED_LIST = 6
  35. TAG_DEF_TERM = 7
  36. TAG_DEF_ITEM = 8
  37. TAG_BLOCKQUOTE = 9
  38. TAG_RULE = 10
  39. TAG_BREAK = 11
  40. TAG_ABBR = 12
  41. TAG_PRE = 13
  42. TAG_MONOSPACE = 14
  43. TAG_CODE = 15
  44. TAG_SPAN = 16
  45. TAG_DIV = 17
  46. TAG_FONT = 18
  47. TAG_SMALL = 19
  48. TAG_BIG = 20
  49. TAG_CENTER = 21
  50. # MediaWiki parser hooks:
  51. TAG_REF = 101
  52. TAG_GALLERY = 102
  53. TAG_MATH = 103
  54. TAG_NOWIKI = 104
  55. TAG_NOINCLUDE = 105
  56. TAG_INCLUDEONLY = 106
  57. TAG_ONLYINCLUDE = 107
  58. # Additional parser hooks:
  59. TAG_SYNTAXHIGHLIGHT = 201
  60. TAG_POEM = 202
  61. # Lists of tags:
  62. TAGS_ALL = set(range(300))
  63. TAGS_INVISIBLE = {TAG_REF, TAG_GALLERY, TAG_MATH, TAG_NOINCLUDE}
  64. TAGS_VISIBLE = TAGS_ALL - TAGS_INVISIBLE
  65. TRANSLATIONS = {
  66. "i": TAG_ITALIC,
  67. "em": TAG_ITALIC,
  68. "b": TAG_BOLD,
  69. "strong": TAG_BOLD,
  70. "u": TAG_UNDERLINE,
  71. "s": TAG_STRIKETHROUGH,
  72. "ul": TAG_UNORDERED_LIST,
  73. "ol": TAG_ORDERED_LIST,
  74. "dt": TAG_DEF_TERM,
  75. "dd": TAG_DEF_ITEM,
  76. "blockquote": TAG_BLOCKQUOTE,
  77. "hl": TAG_RULE,
  78. "br": TAG_BREAK,
  79. "abbr": TAG_ABBR,
  80. "pre": TAG_PRE,
  81. "tt": TAG_MONOSPACE,
  82. "code": TAG_CODE,
  83. "span": TAG_SPAN,
  84. "div": TAG_DIV,
  85. "font": TAG_FONT,
  86. "small": TAG_SMALL,
  87. "big": TAG_BIG,
  88. "center": TAG_CENTER,
  89. "ref": TAG_REF,
  90. "gallery": TAG_GALLERY,
  91. "math": TAG_MATH,
  92. "nowiki": TAG_NOWIKI,
  93. "noinclude": TAG_NOINCLUDE,
  94. "includeonly": TAG_INCLUDEONLY,
  95. "onlyinclude": TAG_ONLYINCLUDE,
  96. "syntaxhighlight": TAG_SYNTAXHIGHLIGHT,
  97. "source": TAG_SYNTAXHIGHLIGHT,
  98. "poem": TAG_POEM,
  99. }
  100. WIKICODE = {
  101. TAG_ITALIC: ("''", "''"),
  102. TAG_BOLD: ("'''", "'''"),
  103. TAG_UNORDERED_LIST: ("*", ""),
  104. TAG_ORDERED_LIST: ("#", ""),
  105. TAG_DEF_TERM: (";", ""),
  106. TAG_DEF_ITEM: (":", ""),
  107. TAG_RULE: ("----", ""),
  108. }