A Python parser for MediaWiki wikicode https://mwparserfromhell.readthedocs.io/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

98 lines
4.0 KiB

  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2012 Ben Kurtovic <ben.kurtovic@verizon.net>
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a copy
  6. # of this software and associated documentation files (the "Software"), to deal
  7. # in the Software without restriction, including without limitation the rights
  8. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. # copies of the Software, and to permit persons to whom the Software is
  10. # furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. # SOFTWARE.
  22. """
  23. This module contains the token definitions that are used as an intermediate
  24. parsing data type - they are stored in a flat list, with each token being
  25. identified by its type and optional attributes. The token list is generated in
  26. a syntactically valid form by the
  27. :py:class:`~mwparserfromhell.parser.tokenizer.Tokenizer`, and then converted
  28. into the :py:class`~mwparserfromhell.wikicode.Wikicode` tree by the
  29. :py:class:`~mwparserfromhell.parser.builder.Builder`.
  30. """
  31. from __future__ import unicode_literals
  32. from ..compat import basestring, py3k
  33. __all__ = ["Token"]
  34. class Token(object):
  35. """A token stores the semantic meaning of a unit of wikicode."""
  36. def __init__(self, **kwargs):
  37. super(Token, self).__setattr__("_kwargs", kwargs)
  38. def __repr__(self):
  39. args = []
  40. for key, value in self._kwargs.items():
  41. if isinstance(value, basestring) and len(value) > 100:
  42. args.append(key + "=" + repr(value[:97] + "..."))
  43. else:
  44. args.append(key + "=" + repr(value))
  45. return "{0}({1})".format(type(self).__name__, ", ".join(args))
  46. def __eq__(self, other):
  47. if isinstance(other, type(self)):
  48. return self._kwargs == other._kwargs
  49. return False
  50. def __getattr__(self, key):
  51. return self._kwargs[key]
  52. def __setattr__(self, key, value):
  53. self._kwargs[key] = value
  54. def __delattr__(self, key):
  55. del self._kwargs[key]
  56. def make(name):
  57. """Create a new Token class using ``type()`` and add it to ``__all__``."""
  58. __all__.append(name)
  59. return type(name if py3k else name.encode("utf8"), (Token,), {})
  60. Text = make("Text")
  61. TemplateOpen = make("TemplateOpen") # {{
  62. TemplateParamSeparator = make("TemplateParamSeparator") # |
  63. TemplateParamEquals = make("TemplateParamEquals") # =
  64. TemplateClose = make("TemplateClose") # }}
  65. HTMLEntityStart = make("HTMLEntityStart") # &
  66. HTMLEntityNumeric = make("HTMLEntityNumeric") # #
  67. HTMLEntityHex = make("HTMLEntityHex") # x
  68. HTMLEntityEnd = make("HTMLEntityEnd") # ;
  69. HeadingStart = make("HeadingStart") # =...
  70. HeadingEnd = make("HeadingEnd") # =...
  71. TagOpenOpen = make("TagOpenOpen") # <
  72. TagAttrStart = make("TagAttrStart")
  73. TagAttrEquals = make("TagAttrEquals") # =
  74. TagAttrQuote = make("TagAttrQuote") # "
  75. TagCloseOpen = make("TagCloseOpen") # >
  76. TagCloseSelfclose = make("TagCloseSelfclose") # />
  77. TagOpenClose = make("TagOpenClose") # </
  78. TagCloseClose = make("TagCloseClose") # >
  79. del make