A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.
 
 
 
 

104 рядки
4.5 KiB

  1. # Copyright (C) 2012-2020 Ben Kurtovic <ben.kurtovic@gmail.com>
  2. #
  3. # Permission is hereby granted, free of charge, to any person obtaining a copy
  4. # of this software and associated documentation files (the "Software"), to deal
  5. # in the Software without restriction, including without limitation the rights
  6. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. # copies of the Software, and to permit persons to whom the Software is
  8. # furnished to do so, subject to the following conditions:
  9. #
  10. # The above copyright notice and this permission notice shall be included in
  11. # all copies or substantial portions of the Software.
  12. #
  13. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  19. # SOFTWARE.
  20. """
  21. This module contains the token definitions that are used as an intermediate
  22. parsing data type - they are stored in a flat list, with each token being
  23. identified by its type and optional attributes. The token list is generated in
  24. a syntactically valid form by the :class:`.Tokenizer`, and then converted into
  25. the :class`.Wikicode` tree by the :class:`.Builder`.
  26. """
  27. __all__ = ["Token"]
  28. class Token(dict):
  29. """A token stores the semantic meaning of a unit of wikicode."""
  30. def __repr__(self):
  31. args = []
  32. for key, value in self.items():
  33. if isinstance(value, str) and len(value) > 100:
  34. args.append(key + "=" + repr(value[:97] + "..."))
  35. else:
  36. args.append(key + "=" + repr(value))
  37. return "{}({})".format(type(self).__name__, ", ".join(args))
  38. def __eq__(self, other):
  39. return isinstance(other, type(self)) and dict.__eq__(self, other)
  40. def __ne__(self, other):
  41. return not self.__eq__(other)
  42. def __getattr__(self, key):
  43. return self.get(key)
  44. def __setattr__(self, key, value):
  45. self[key] = value
  46. def __delattr__(self, key):
  47. del self[key]
  48. def make(name):
  49. """Create a new Token class using ``type()`` and add it to ``__all__``."""
  50. __all__.append(name)
  51. return type(name, (Token,), {})
  52. Text = make("Text")
  53. TemplateOpen = make("TemplateOpen") # {{
  54. TemplateParamSeparator = make("TemplateParamSeparator") # |
  55. TemplateParamEquals = make("TemplateParamEquals") # =
  56. TemplateClose = make("TemplateClose") # }}
  57. ArgumentOpen = make("ArgumentOpen") # {{{
  58. ArgumentSeparator = make("ArgumentSeparator") # |
  59. ArgumentClose = make("ArgumentClose") # }}}
  60. WikilinkOpen = make("WikilinkOpen") # [[
  61. WikilinkSeparator = make("WikilinkSeparator") # |
  62. WikilinkClose = make("WikilinkClose") # ]]
  63. ExternalLinkOpen = make("ExternalLinkOpen") # [
  64. ExternalLinkSeparator = make("ExternalLinkSeparator") #
  65. ExternalLinkClose = make("ExternalLinkClose") # ]
  66. HTMLEntityStart = make("HTMLEntityStart") # &
  67. HTMLEntityNumeric = make("HTMLEntityNumeric") # #
  68. HTMLEntityHex = make("HTMLEntityHex") # x
  69. HTMLEntityEnd = make("HTMLEntityEnd") # ;
  70. HeadingStart = make("HeadingStart") # =...
  71. HeadingEnd = make("HeadingEnd") # =...
  72. CommentStart = make("CommentStart") # <!--
  73. CommentEnd = make("CommentEnd") # -->
  74. TagOpenOpen = make("TagOpenOpen") # <
  75. TagAttrStart = make("TagAttrStart")
  76. TagAttrEquals = make("TagAttrEquals") # =
  77. TagAttrQuote = make("TagAttrQuote") # ", '
  78. TagCloseOpen = make("TagCloseOpen") # >
  79. TagCloseSelfclose = make("TagCloseSelfclose") # />
  80. TagOpenClose = make("TagOpenClose") # </
  81. TagCloseClose = make("TagCloseClose") # >
  82. del make